From: Jack Steiner <steiner@sgi.com>
To: linux-ia64@vger.kernel.org
Subject: [Linux-ia64] 2.4.5 hangs in smp_call_function.
Date: Thu, 07 Jun 2001 22:00:11 +0000 [thread overview]
Message-ID: <marc-linux-ia64-105590693005699@msgid-missing> (raw)
Since upgrading to 2.4.5, we have seen several system hangs
where multiple cpus were spinning in smp_call_function.
The problem appears to be caused by the code in smp_call_function()
that resends an IPI if a timeout expires.
Resending a IPI_CALL_FUNC IPI can cause a cpu to process the "call_func"
request twice and corrupt the "data.finished" count by incrementing the
count twice for one request.
Here is a patch that corrects the problem. I'm not sure what the
correct timeout should be - I increase it from HZ to 400000UL
but more investigation need to be done to determine the optimum
value. Since the "resendIPI" code is not needed with C0 stepping
cpus, I didnt worry too much about the timeout value.
I havent seen any more hangs after applying the patch.
----------------------------------------------------------------------------------
diff -Naur linux_base/arch/ia64/kernel/smp.c linux/arch/ia64/kernel/smp.c
--- linux_base/arch/ia64/kernel/smp.c Thu Jun 7 14:44:07 2001
+++ linux/arch/ia64/kernel/smp.c Thu Jun 7 14:46:05 2001
@@ -244,6 +244,28 @@
send_IPI_single(smp_processor_id(), op);
}
+#if (defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC) || defined(CONFIG_ITANIUM_PTCG))
+void
+resend_lost_IPI (void)
+{
+ /*
+ * Really need a null IPI but since this rarely should happen & since this code
+ * will go away, lets not add one.
+ */
+ send_IPI_allbutself(IPI_RESCHEDULE);
+}
+
+void
+resend_lost_IPI_single (int cpu)
+{
+ /*
+ * Really need a null IPI but since this rarely should happen & since this code
+ * will go away, lets not add one.
+ */
+ send_IPI_single(cpu, IPI_RESCHEDULE);
+}
+#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC || CONFIG_ITANIUM_BSTEP_SPECIFIC || CONFIG_ITANIUM_PTCG */
+
void
smp_send_reschedule (int cpu)
{
@@ -258,16 +280,6 @@
send_IPI_allbutself(IPI_FLUSH_TLB);
}
-void
-smp_resend_flush_tlb (void)
-{
- /*
- * Really need a null IPI but since this rarely should happen & since this code
- * will go away, lets not add one.
- */
- send_IPI_allbutself(IPI_RESCHEDULE);
-}
-
#endif /* !CONFIG_ITANIUM_PTCG */
void
@@ -314,16 +326,18 @@
spin_lock_bh(&call_lock);
call_data = &data;
- resend:
send_IPI_single(cpuid, IPI_CALL_FUNC);
#if (defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC))
/* Wait for response */
- timeout = jiffies + HZ;
+ again:
+ timeout = jiffies + 400000UL;
while ((atomic_read(&data.started) != cpus) && time_before(jiffies, timeout))
barrier();
- if (atomic_read(&data.started) != cpus)
- goto resend;
+ if (atomic_read(&data.started) != cpus) {
+ resend_lost_IPI_single(cpuid);
+ goto again;
+ }
#else
/* Wait for response */
while (atomic_read(&data.started) != cpus)
@@ -379,17 +393,19 @@
spin_lock_bh(&call_lock);
call_data = &data;
- resend:
/* Send a message to all other CPUs and wait for them to respond */
send_IPI_allbutself(IPI_CALL_FUNC);
#if (defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC))
/* Wait for response */
- timeout = jiffies + HZ;
+ again:
+ timeout = jiffies + 400000UL;
while ((atomic_read(&data.started) != cpus) && time_before(jiffies, timeout))
barrier();
- if (atomic_read(&data.started) != cpus)
- goto resend;
+ if (atomic_read(&data.started) != cpus) {
+ resend_lost_IPI();
+ goto again;
+ }
#else
/* Wait for response */
while (atomic_read(&data.started) != cpus)
diff -Naur linux_base/arch/ia64/mm/tlb.c linux/arch/ia64/mm/tlb.c
--- linux_base/arch/ia64/mm/tlb.c Thu Jun 7 14:44:07 2001
+++ linux/arch/ia64/mm/tlb.c Thu Jun 7 14:46:07 2001
@@ -99,12 +99,12 @@
*/
#if defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC)
{
- extern void smp_resend_flush_tlb (void);
+ extern void smp_resend_lost_IPI (void);
unsigned long start = ia64_get_itc();
while (atomic_read(&flush_cpu_count) > 0) {
if ((ia64_get_itc() - start) > 400000UL) {
- smp_resend_flush_tlb();
+ smp_resend_lost_IPI();
start = ia64_get_itc();
}
}
--
Thanks
Jack Steiner (651-683-5302) (vnet 233-5302) steiner@sgi.com
next reply other threads:[~2001-06-07 22:00 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2001-06-07 22:00 Jack Steiner [this message]
2001-06-08 18:49 ` [Linux-ia64] 2.4.5 hangs in smp_call_function Seth, Rohit
2001-06-08 20:03 ` David Mosberger
2001-06-12 6:37 ` root
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=marc-linux-ia64-105590693005699@msgid-missing \
--to=steiner@sgi.com \
--cc=linux-ia64@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.