From: "Gary Funck" <gary@intrepid.com>
To: "Linux-Kernel@Vger. Kernel. Org" <linux-kernel@vger.kernel.org>
Subject: RE: 2.6.17-1.2145_FC5 mmap-related soft lockup
Date: Sat, 15 Jul 2006 23:50:26 -0700 [thread overview]
Message-ID: <JCEPIPKHCJGDMPOHDOIGKEKMDFAA.gary@intrepid.com> (raw)
In-Reply-To: <JCEPIPKHCJGDMPOHDOIGGEKJDFAA.gary@intrepid.com>
[-- Attachment #1: Type: text/plain, Size: 1724 bytes --]
> From: Gary Funck
> Sent: Saturday, July 15, 2006 10:07 AM
>
> A test program which allocates about 256M of MAP_ANONYMOUS mmap memory,
> and then spawns 4 processess, where each process i writes to 1/4 of the
> mapped memory, and then reads the memory written by
> the process (i + 1)%4, triggers a soft lockup, when exiting.
> Hardware:
> dual core dual Opteron 275 (Tyan motherboard, 4G physical memory)
> has been rock solid reliable.
>
> BUG: soft lockup detected on CPU#3!
Follow up, the attached test program, when compiled on FC5 with the
2.6.17-1.2145 kernel will cause lost timer ticks, lost RS-232 interrupts,
and often will lead to the soft lockup situation shown below:
BUG: soft lockup detected on CPU#0!
Call Trace: <IRQ> <ffffffff802b2fb5>{softlockup_tick+219}
<ffffffff8029708e>{update_process_times+66}
<ffffffff8027a3ed>{smp_local_timer_interrupt+35}
<ffffffff8027aa95>{smp_apic_timer_interrupt+65}
<ffffffff80263acb>{apic_timer_interrupt+135} <EOI>
<ffffffff8020e578>{__set_page_dirty_nobuffers+0}
<ffffffff8020a7ab>{release_pages+111}
<ffffffff80267b76>{thread_return+0}
<ffffffff80267bd4>{thread_return+94}
<ffffffff8020e578>{__set_page_dirty_nobuffers+0}
<ffffffff8020e578>{__set_page_dirty_nobuffers+0}
<ffffffff8020e09e>{free_pages_and_swap_cache+115}
<ffffffff80207b62>{unmap_vmas+1145}
<ffffffff8023c7d9>{exit_mmap+120} <ffffffff8023eda8>{mmput+44}
<ffffffff80215ece>{do_exit+599}
<ffffffff8024cacd>{debug_mutex_init+0}
<ffffffff80262f01>{tracesys+209}
After compiling the code, A continuous loop like the following, seems to
eventually lead to the soft lockup situation (FC5, x86_64) shown above:
while true; do
a.out
done
[-- Attachment #2: mmap_soft_lockup_x86_64.c --]
[-- Type: application/octet-stream, Size: 3862 bytes --]
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/wait.h>
#define PER_PROCESS_ALLOC (256*1024*1024)
#define NUM_SYNC_WORDS (256/32)
typedef struct shared_data_s
{
int sync[NUM_SYNC_WORDS];
} shared_data_t;
typedef shared_data_t *shared_data_p;
shared_data_p shared_data;
int num_cpus;
void *map;
int n;
#if __x86_64__
#define LOCK_PREFIX "lock ; "
int
atomic_cas (int *ptr, int old, int new)
{
int prev;
__asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
: "=a"(prev)
: "q"(new), "m"(*ptr), "0"(old)
: "memory");
return prev == old;
}
#else
# error this test must be run on an x86_64 cpu
#endif
int
get_bit (int *bits, int bitnum)
{
int *word = bits + (bitnum / 32);
int bit = (1 << (bitnum % 32));
return (*word & bit) != 0;
}
void
atomic_set_bit (int *bits, int bitnum)
{
int *word = bits + (bitnum / 32);
int bit = (1 << (bitnum % 32));
int old_val, new_val;
do
{
old_val = *word;
new_val = old_val | bit;
}
while (!atomic_cas (word, old_val, new_val));
}
void
barrier ()
{
int *sync = shared_data->sync;
atomic_set_bit (sync, n); /* set my bit */
if (n == 0)
{
int i, cnt;
/* spin until all bits are set */
for (cnt = 0; cnt != num_cpus;)
for (i = 0, cnt = 0; i < num_cpus; ++i)
if (get_bit(sync, i)) ++cnt;
/* all set, open the barrier. */
for (i = 0; i < (num_cpus + 31)/32; ++i) sync[i] = 0;
}
else
while (get_bit (sync, n)) /* spin on my bit set */;
}
void
run_test ()
{
int nxt = (n + 1) % num_cpus;
char *s;
char *cp;
int i;
int c;
char *buf;
barrier ();
/* write the data for the next process */
s = map + nxt * PER_PROCESS_ALLOC;
c = 'A' + nxt % 26;
memset (s, c, PER_PROCESS_ALLOC-1);
s[PER_PROCESS_ALLOC-1] = '\0';
barrier ();
/* read our data */
s = map + n * PER_PROCESS_ALLOC;
c = 'A' + n % 26;
buf = malloc (PER_PROCESS_ALLOC);
if (!buf)
{ perror ("malloc"); abort (); }
memset (buf, c, PER_PROCESS_ALLOC-1);
buf[PER_PROCESS_ALLOC-1] = '\0';
if (strcmp(s, buf))
{ fprintf (stderr, "%d: data mismatch\n", n); abort (); }
barrier ();
exit (0);
}
int
main (int argc, char *argv[])
{
char *pgm = argv[0];
int mask;
int fd;
int pid;
off_t alloc_size;
int wait_status;
num_cpus = (int)sysconf(_SC_NPROCESSORS_ONLN);
if (num_cpus <= 0)
{ perror ("sysconf"); abort (); }
for (mask = 1; (num_cpus & ~mask); mask <<= 1)
num_cpus = (num_cpus & ~mask);
shared_data = mmap((void *)0, 64*1024*1024, PROT_READ|PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, 0, (off_t)0);
if (shared_data == MAP_FAILED)
{ perror ("mmap"); abort (); }
memset (shared_data, '\0', sizeof (shared_data_t));
alloc_size = num_cpus * PER_PROCESS_ALLOC;
map = mmap((void *)0, alloc_size, PROT_READ|PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, 0, (off_t)0);
if (map == MAP_FAILED)
{ perror ("mmap"); abort (); }
for (n = 0; n < num_cpus; ++n)
{
pid = fork ();
if (pid == 0)
run_test (n); /* no return */
else if (pid < 0)
{ perror ("fork"); abort (); }
}
while ((pid = wait (&wait_status)) > 0)
{
if (WIFEXITED (wait_status))
{
int child_exit = WEXITSTATUS (wait_status);
if (child_exit)
{ fprintf (stderr, "non-zero child exit status\n"); abort ();}
}
else if (WIFSIGNALED (wait_status))
{
int child_sig = WTERMSIG (wait_status);
fprintf (stderr, "child caught signal\n");
abort ();
}
}
exit (0);
}
prev parent reply other threads:[~2006-07-16 6:50 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-07-15 17:07 2.6.17-1.2145_FC5 mmap-related soft lockup Gary Funck
2006-07-16 5:19 ` Andrew Morton
2006-07-19 2:25 ` Gary Funck
2006-07-19 4:04 ` Andrew Morton
2006-07-16 6:50 ` Gary Funck [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=JCEPIPKHCJGDMPOHDOIGKEKMDFAA.gary@intrepid.com \
--to=gary@intrepid.com \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox