All of lore.kernel.org
 help / color / mirror / Atom feed
* [Adeos-main] [RFC][PATCH] Reduce pipeline_head access overhead
@ 2008-02-27  8:29 Jan Kiszka
  2008-03-25  8:17 ` Philippe Gerum
  0 siblings, 1 reply; 2+ messages in thread
From: Jan Kiszka @ 2008-02-27  8:29 UTC (permalink / raw)
  To: adeos-main


[-- Attachment #1.1: Type: text/plain, Size: 1392 bytes --]

This patch is in fact part of my xnlock rework (updates on that one will
follow the next days). I noticed that the complexity of
ipipe_cpudom_var(head_domain, *), normally inlined, is fairly relevant
for the text size of Xenomai and may thus impact the WCET as well.

So I applied the same pattern as already used for ipipe_root_cpudom_var
also to the head domain, ie. assigned a fixed slot, the topmost, in
ipipe_percpu_daddr/darray to the head domain. This gives a nice text
size reduction as we no longer need to go via
__ipipe_pipeline.next->slot (2 memory accesses, even 3 on x867/SMP +
some arithmetics) to look up the address of per-cpu domain variables
like "status".

The drawback is that this introduces an inconsistency to the
ipipe_*_pipeline_head API: It no longer works for head == root. While
this is most probably irrelevant in practice, it remains ugly. I already
thought about renaming the modified services (and deprecating the
original ones), but for now I would like to focus the discussion on the
optimized services.

The patch already survived quite some testing on various real and
virtual boxes with Xenomai. Latency reduction is hard to estimate as
such comparably small optimizations are within the noise range of fast
test boxes. Maybe someone could give this a try on a smaller device (UP
should benefit from this patch as well!).

Jan

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1.2: optimize-head-status.patch --]
[-- Type: text/x-patch; name="optimize-head-status.patch", Size: 7913 bytes --]

---
 include/linux/ipipe.h        |   13 +++++--------
 include/linux/ipipe_percpu.h |    8 +++++++-
 kernel/ipipe/core.c          |   42 ++++++++++++++++++++++--------------------
 3 files changed, 34 insertions(+), 29 deletions(-)

Index: b/include/linux/ipipe.h
===================================================================
--- a/include/linux/ipipe.h
+++ b/include/linux/ipipe.h
@@ -361,31 +361,29 @@ static inline unsigned long ipipe_test_p
 static inline void ipipe_stall_pipeline_head(void)
 {
 	local_irq_disable_hw();
-	__set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(__ipipe_pipeline_head(), status));
+	__set_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status));
 }
 
 static inline unsigned long ipipe_test_and_stall_pipeline_head(void)
 {
 	local_irq_disable_hw();
-	return __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(__ipipe_pipeline_head(), status));
+	return __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status));
 }
 
 void ipipe_unstall_pipeline_head(void);
 
-void fastcall __ipipe_restore_pipeline_head(struct ipipe_domain *head_domain,
-					    unsigned long x);
+void fastcall __ipipe_restore_pipeline_head(unsigned long x);
 
 static inline void ipipe_restore_pipeline_head(unsigned long x)
 {
-	struct ipipe_domain *head_domain = __ipipe_pipeline_head();
 	/* On some archs, __test_and_set_bit() might return different
 	 * truth value than test_bit(), so we test the exclusive OR of
 	 * both statuses, assuming that the lowest bit is always set in
 	 * the truth value (if this is wrong, the failed optimization will
 	 * be caught in __ipipe_restore_pipeline_head() if
 	 * CONFIG_DEBUG_KERNEL is set). */
-	if ((x ^ test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, status))) & 1)
-		__ipipe_restore_pipeline_head(head_domain, x);
+	if ((x ^ test_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status))) & 1)
+		__ipipe_restore_pipeline_head(x);
 }
 
 #define ipipe_unstall_pipeline() \
@@ -477,7 +475,6 @@ int ipipe_disable_ondemand_mappings(stru
 #define local_irq_disable_hw_cond()		local_irq_disable_hw()
 #define local_irq_save_hw_cond(flags)		local_irq_save_hw(flags)
 #define local_irq_restore_hw_cond(flags)	local_irq_restore_hw(flags)
-#define local_irq_disable_head()		ipipe_stall_pipeline_head()
 
 #define local_irq_enable_nohead(ipd)			\
 	do {						\
Index: b/include/linux/ipipe_percpu.h
===================================================================
--- a/include/linux/ipipe_percpu.h
+++ b/include/linux/ipipe_percpu.h
@@ -49,6 +49,9 @@ DECLARE_PER_CPU(struct ipipe_percpu_doma
 	(__raw_get_cpu_var(ipipe_percpu_daddr)[(ipd)->slot]->var)
 #endif
 
+#define IPIPE_ROOT_SLOT			0
+#define IPIPE_HEAD_SLOT			(CONFIG_IPIPE_DOMAINS - 1)
+
 DECLARE_PER_CPU(struct ipipe_percpu_domain_data, ipipe_percpu_darray[CONFIG_IPIPE_DOMAINS]);
 
 DECLARE_PER_CPU(struct ipipe_domain *, ipipe_percpu_domain);
@@ -61,9 +64,12 @@ DECLARE_PER_CPU(int, ipipe_percpu_contex
 #define ipipe_cpu_var(var)		__raw_get_cpu_var(var)
 
 #define ipipe_root_cpudom_var(var)	\
-	__raw_get_cpu_var(ipipe_percpu_darray)[0].var
+	__raw_get_cpu_var(ipipe_percpu_darray)[IPIPE_ROOT_SLOT].var
 
 #define ipipe_this_cpudom_var(var)	\
 	ipipe_cpudom_var(ipipe_current_domain, var)
 
+#define ipipe_head_cpudom_var(var)	\
+	__raw_get_cpu_var(ipipe_percpu_darray)[IPIPE_HEAD_SLOT].var
+
 #endif	/* !__LINUX_IPIPE_PERCPU_H */
Index: b/kernel/ipipe/core.c
===================================================================
--- a/kernel/ipipe/core.c
+++ b/kernel/ipipe/core.c
@@ -65,17 +65,18 @@ EXPORT_SYMBOL(__ipipe_root_status);
  * Work around a GCC 3.x issue making alias symbols unusable as
  * constant initializers.
  */
-unsigned long *const __ipipe_root_status_addr = &__raw_get_cpu_var(ipipe_percpu_darray)[0].status;
+unsigned long *const __ipipe_root_status_addr =
+	&__raw_get_cpu_var(ipipe_percpu_darray)[IPIPE_ROOT_SLOT].status;
 EXPORT_SYMBOL(__ipipe_root_status_addr);
 #endif /* __GNUC__ < 4 */
 
 DEFINE_PER_CPU(struct ipipe_percpu_domain_data *, ipipe_percpu_daddr[CONFIG_IPIPE_DOMAINS]) =
-{ [0] = (struct ipipe_percpu_domain_data *)&__raw_get_cpu_var(ipipe_percpu_darray) };
+{ [IPIPE_ROOT_SLOT] = (struct ipipe_percpu_domain_data *)&__raw_get_cpu_var(ipipe_percpu_darray) };
 EXPORT_PER_CPU_SYMBOL(ipipe_percpu_daddr);
 #endif /* !CONFIG_SMP */
 
 DEFINE_PER_CPU(struct ipipe_percpu_domain_data, ipipe_percpu_darray[CONFIG_IPIPE_DOMAINS]) =
-{ [0] = { .status = IPIPE_STALL_MASK } }; /* Root domain stalled on each CPU at startup. */
+{ [IPIPE_ROOT_SLOT] = { .status = IPIPE_STALL_MASK } }; /* Root domain stalled on each CPU at startup. */
 
 DEFINE_PER_CPU(struct ipipe_domain *, ipipe_percpu_domain) = { &ipipe_root };
 
@@ -395,14 +396,12 @@ void fastcall ipipe_restore_pipeline_fro
 
 void ipipe_unstall_pipeline_head(void)
 {
-	struct ipipe_domain *head_domain;
-
 	local_irq_disable_hw();
 
-	head_domain = __ipipe_pipeline_head();
-	__clear_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, status));
+	__clear_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status));
 
-	if (unlikely(ipipe_cpudom_var(head_domain, irqpend_himask) != 0)) {
+	if (unlikely(ipipe_head_cpudom_var(irqpend_himask) != 0)) {
+		struct ipipe_domain *head_domain = __ipipe_pipeline_head();
 		if (likely(head_domain == ipipe_current_domain))
 			__ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
 		else
@@ -412,14 +411,14 @@ void ipipe_unstall_pipeline_head(void)
 	local_irq_enable_hw();
 }
 
-void fastcall __ipipe_restore_pipeline_head(struct ipipe_domain *head_domain, unsigned long x)
+void fastcall __ipipe_restore_pipeline_head(unsigned long x)
 {
 	local_irq_disable_hw();
 
 	if (x) {
 #ifdef CONFIG_DEBUG_KERNEL
 		static int warned;
-		if (!warned && test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, status))) {
+		if (!warned && test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status))) {
 			/*
 			 * Already stalled albeit ipipe_restore_pipeline_head()
 			 * should have detected it? Send a warning once.
@@ -430,12 +429,13 @@ void fastcall __ipipe_restore_pipeline_h
 			dump_stack();
 		}
 #else /* !CONFIG_DEBUG_KERNEL */
-		set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, status));
+		set_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status));
 #endif /* CONFIG_DEBUG_KERNEL */
 	}
 	else {
-		__clear_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, status));
-		if (unlikely(ipipe_cpudom_var(head_domain, irqpend_himask) != 0)) {
+		__clear_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status));
+		if (unlikely(ipipe_head_cpudom_var(irqpend_himask) != 0)) {
+			struct ipipe_domain *head_domain = __ipipe_pipeline_head();
 			if (likely(head_domain == ipipe_current_domain))
 				__ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
 			else
@@ -1011,7 +1011,7 @@ int ipipe_register_domain(struct ipipe_d
 			  struct ipipe_domain_attr *attr)
 {
 	struct ipipe_domain *_ipd;
-	struct list_head *pos;
+	struct list_head *pos = NULL;
 	unsigned long flags;
 
 	if (!ipipe_root_domain_p) {
@@ -1020,14 +1020,16 @@ int ipipe_register_domain(struct ipipe_d
 		return -EPERM;
 	}
 
-	if (attr->priority == IPIPE_HEAD_PRIORITY &&
-	    test_bit(IPIPE_AHEAD_FLAG,&__ipipe_pipeline_head()->flags))
-		return -EAGAIN;	/* Cannot override current head. */
-
 	flags = ipipe_critical_enter(NULL);
 
-	pos = NULL;
-	ipd->slot = ffz(__ipipe_domain_slot_map);
+	if (attr->priority == IPIPE_HEAD_PRIORITY) {
+		if (test_bit(IPIPE_HEAD_SLOT, &__ipipe_domain_slot_map)) {
+			ipipe_critical_exit(flags);
+			return -EAGAIN;	/* Cannot override current head. */
+		}
+		ipd->slot = IPIPE_HEAD_SLOT;
+	} else
+		ipd->slot = ffz(__ipipe_domain_slot_map);
 
 	if (ipd->slot < CONFIG_IPIPE_DOMAINS) {
 		set_bit(ipd->slot, &__ipipe_domain_slot_map);

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 254 bytes --]

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [Adeos-main] [RFC][PATCH] Reduce pipeline_head access overhead
  2008-02-27  8:29 [Adeos-main] [RFC][PATCH] Reduce pipeline_head access overhead Jan Kiszka
@ 2008-03-25  8:17 ` Philippe Gerum
  0 siblings, 0 replies; 2+ messages in thread
From: Philippe Gerum @ 2008-03-25  8:17 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: adeos-main

Jan Kiszka wrote:
> This patch is in fact part of my xnlock rework (updates on that one will
> follow the next days). I noticed that the complexity of
> ipipe_cpudom_var(head_domain, *), normally inlined, is fairly relevant
> for the text size of Xenomai and may thus impact the WCET as well.
> 
> So I applied the same pattern as already used for ipipe_root_cpudom_var
> also to the head domain, ie. assigned a fixed slot, the topmost, in
> ipipe_percpu_daddr/darray to the head domain. This gives a nice text
> size reduction as we no longer need to go via
> __ipipe_pipeline.next->slot (2 memory accesses, even 3 on x867/SMP +
> some arithmetics) to look up the address of per-cpu domain variables
> like "status".
> 
> The drawback is that this introduces an inconsistency to the
> ipipe_*_pipeline_head API: It no longer works for head == root. While
> this is most probably irrelevant in practice, it remains ugly. I already
> thought about renaming the modified services (and deprecating the
> original ones), but for now I would like to focus the discussion on the
> optimized services.
> 
> The patch already survived quite some testing on various real and
> virtual boxes with Xenomai. Latency reduction is hard to estimate as
> such comparably small optimizations are within the noise range of fast
> test boxes. Maybe someone could give this a try on a smaller device (UP
> should benefit from this patch as well!).
>

The root domain cannot be an invariant pipeline head, by design, so I don't see
any showstopper regarding this approach. This looks good.

> Jan
> 
> 
> ------------------------------------------------------------------------
> 
> _______________________________________________
> Adeos-main mailing list
> Adeos-main@domain.hid
> https://mail.gna.org/listinfo/adeos-main


-- 
Philippe.


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2008-03-25  8:17 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-02-27  8:29 [Adeos-main] [RFC][PATCH] Reduce pipeline_head access overhead Jan Kiszka
2008-03-25  8:17 ` Philippe Gerum

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.