/* * Thread-creation bug reproducer. * * Author: jimrees@itasoftware.com * * Starting with nearly any 2.4 kernel, on SMP platforms (both Intel * and AMD), it appears that newly created threads sometimes get an * inconsistent view of memory. This program is intended to reproduce * the problem, but it can sometimes take a while. I have seen * reproducible failures on 2.4.2, 2.4.5, 2.4.8, 2.4.9, and 2.4.14. * * main() starts off by creating 'M' leaf-creator threads, each of * these rendezvous before starting to fire off children. Then each * leaf-creator fires off 'N' threads, and gives each a pointer to a * plain data structure where the fourth element must be the sum of * the first three, and the first three are randomly generated. The * subthread then 'validates' the structure, complains if the * condition fails, and exits. * * Typical behavior includes segfaults, complaint of failure, and * complaint of failure followed immediately by segfault. I would * guess that if a failure can be detected, then there's also a good * chance the thread that detected the failure has an inconsistent * view of its own stack -- hence the segfaults. Note that I never * seen any other running program affected by this behavior, nor have * I seen any kernel panics/oops or system hangs. The multi-threaded * program is the only program affected. * * With default arguments, the program usually crashes within a few * seconds, though some cases take longer -- 4 minutes with no crashes * is probably sufficient to say the bug is fixed. * * Usually, I get the crashes quick if nothing else on the system is * consuming significant CPU time. Though, I have seen crashes happen * during kernel builds too. * * The random sleep variables used are admittedly very crude -- I know * that the best sleep precision available is 10ms, so most of the * sleeps below are simply until the next kernel tick -- but the * reproducer still works fine. * * To build the reproducer: * * g++ -O threadjam.cpp -lpthread */ #include #include // atoi #include // usleep,sleep #include // cout,cerr #include // ifstream #include // strcasecmp /* * Parameters: * * N -- the split, default to 5 * -- This is the number of leaf-threads created by each primary thread * per loop pass. * M -- # leaf-thread creator threads, defaults to 4 * * SMIN -- min sleep us, defaults to 1000 micro-seconds (1 milli) * SMAX -- max sleep us, defaults to 2000 micro-seconds (2 milli) * * Yes, sleeps on Linux are constrained to multiples of 10 milliseconds (plus the * time until the *next* 10ms kernel tick). */ static int N = 5; static int M = 4; static int SMIN = 1000; static int SMAX = 2000; const struct argProc_t { const char* argname; int* argptr; } argProc[] = { { "N", &N }, { "M", &M }, { "SMIN", &SMIN }, { "SMAX", &SMAX }, { NULL, NULL } }; static void parse_args(int argc, char **argv) { while(argc > 1 && argv[0][0] == '-') { const argProc_t* a = argProc; bool match_found = false; while(a->argname) { if (strcasecmp(&argv[0][1], a->argname) == 0) { *(a->argptr) = atoi(argv[1]); cout << a->argname << " = " << *(a->argptr) << endl; argv += 2; argc -= 2; match_found = true; break; } else ++a; } if (!match_found) { clog << "No match for " << argv[0] << endl; exit(1); } } } /* * The Rendezvous class is a way for multiple threads to 'meet up' at * a particular point before proceeding. All threads arriving are * blocked until the *final* thread arrives, then all threads are * released. */ class Rendezvous { pthread_mutex_t m_lock; pthread_cond_t m_condition; int m_waiting_for; // # threads waiting for still public: explicit Rendezvous(int mthreads=0) : m_waiting_for(mthreads) { pthread_mutex_init(&m_lock, NULL); pthread_cond_init(&m_condition, NULL); } void setThreads(int n) { m_waiting_for = n; } // Join the rendezvous... void join() { pthread_mutex_lock(&m_lock); if (--m_waiting_for == 0) { // Ah, I was the last one, broadcast, and return... pthread_cond_broadcast(&m_condition); pthread_mutex_unlock(&m_lock); return; } // Otherwise, wait until notified... do { pthread_cond_wait(&m_condition, &m_lock); } while(m_waiting_for > 0); pthread_mutex_unlock(&m_lock); } }; //////////////////////////////////////////////////////////////// // A quick 'n dirty counter class with memory locked increment // support. Not essential to demonstration of bug. class AtomicCounter { volatile unsigned int m_count; public: AtomicCounter(unsigned int _initial_value=0) throw() : m_count(_initial_value) {} const unsigned int operator++() throw() { unsigned int prior_value; asm (".byte 0xf0, 0x0f, 0xc1, 0x02" // lock; xaddl %eax, (%edx) : "=a" (prior_value) : "0" (+1), "m" (m_count), "d" (&m_count) : "memory"); return (prior_value + 1); } const unsigned int get() const throw() { return m_count; } }; static AtomicCounter total; static pthread_mutex_t global_lock = PTHREAD_MUTEX_INITIALIZER; static ifstream random_input("/dev/urandom"); static int local_random() { int n; char* ptr = reinterpret_cast(&n); pthread_mutex_lock(&global_lock); { random_input.read(ptr, sizeof(n)); } pthread_mutex_unlock(&global_lock); return n; } static double local_random_f() { unsigned long r = local_random(); return double(r) / double(0xffffffff); } /* * start_up_data is a structure 'passed' to the child * thread as part of its creation. The child is responsible * for delete'ing this. */ struct start_up_data { int a; int b; int c; int d; /* * setup() called by parent immediately prior * to child thread creation. */ void setup() { d = a + b + c; } /* * validate() invoked by child thread upon startup. */ bool validate() const { return ((a + b + c) == d); } }; static const char* const failure_message = "Failure detected\n"; void* run(void* data) { start_up_data* v = static_cast(data); if (!v->validate()) cerr << failure_message; delete v; ++total; // detach myself pthread_detach(pthread_self()); return NULL; } static Rendezvous rendezvous; void* forker(void* data) { const int SLEEP_US = int(double(SMIN) + double(SMAX-SMIN) * local_random_f()); pthread_mutex_lock(&global_lock); cout << SLEEP_US << '\n'; pthread_mutex_unlock(&global_lock); start_up_data* dats[N]; rendezvous.join(); while(true) { // Partially setup each child's structure for(int i=0;ia = local_random(); dats[i]->b = local_random(); dats[i]->c = local_random(); } pthread_t tid; for(int i=0;isetup(); pthread_create(&tid, NULL, run, dats[i]); } usleep(SLEEP_US); } return NULL; } int main(int argc, char **argv) { parse_args(argc-1, argv+1); // Start M threads, each of which forks 5 threads // at a time, and sleeps such that roughly 5-ways // splits occur per second. pthread_t tid; rendezvous.setThreads(M+1); for(int i=0;i