[PATCH] multithreaded coredumps for elf exeecutables for O(1) scheduler

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH] multithreaded coredumps for elf exeecutables for O(1) scheduler
@ 2002-05-10 13:24 Mark Gross
  2002-05-10 19:06 ` Daniel Jacobowitz
  0 siblings, 1 reply; 6+ messages in thread
From: Mark Gross @ 2002-05-10 13:24 UTC (permalink / raw)
  To: linux-kernel; +Cc: mark.gross, mark, vamsi, efocht

[-- Attachment #1: Type: text/plain, Size: 1474 bytes --]


Attached is my current patch for creating multithreaded core dump files,
that works with the O(1) scheduler.
 
This is a continuation of the work posted by Vamsi Krishna back on 3/21/02.
I'm sorry for the delay.  The problem of suspending the other thread
processes for the duration of the core dump was a challenging problem with
the O(1) scheduler.
 
Most of the patch is the same as that posted on 3/21/02 with some minor
fixes and the rebasing to the 2.5.14 kernel.  The interesting bits are in
the additions to sched.c to pause and resume the thread processes under the 
O(1) scheduler.
 
Here I'm leveraging the work of Eric Foct for the process migration, to
temporarily migrate the thread processes I need suspended to a "phantom
runqueue".  This is just an additional run queue that has no cpu.  When I'm
finish with the core dump I migrate them off the phantom run queue and
continue processing whatever exit processing they do.
 
I tried a number of approaches to process pausing that didn't quite work 
before I settled on the attached implementation.
 
This work has been unit test on a 2 way and 4 way SMP systems with no
lockups so far.  YMMV.
 
Note: GDB 5.x will work with the core files created with this patch, provided
the libpthread that gets loaded at gdb debug time is stripped of symbols. 

Run strip on your libpthread so files and things should work fine for you.  
 
--mgross
 
(W) 503-712-8218
MS: JF1-05
2111 N.E. 25th Ave.
Hillsboro, OR 97124

[-- Attachment #2: tcore-2.5.14.pat --]
[-- Type: text/x-diff, Size: 25242 bytes --]

diff -urN -X dontdiff linux-2.5.14.vannilla/arch/i386/kernel/i387.c linux2.5.14.tcore/arch/i386/kernel/i387.c
--- linux-2.5.14.vannilla/arch/i386/kernel/i387.c	Sun May  5 23:38:06 2002
+++ linux2.5.14.tcore/arch/i386/kernel/i387.c	Tue May  7 14:59:10 2002
@@ -528,3 +528,36 @@
 
 	return fpvalid;
 }
+
+int dump_task_fpu( struct task_struct *tsk, struct user_i387_struct *fpu )
+{
+	int fpvalid;
+
+	fpvalid = tsk->used_math;
+	if ( fpvalid ) {
+		if (tsk == current) unlazy_fpu( tsk );
+		if ( cpu_has_fxsr ) {
+			copy_fpu_fxsave( tsk, fpu );
+		} else {
+			copy_fpu_fsave( tsk, fpu );
+		}
+	}
+
+	return fpvalid;
+}
+
+int dump_task_extended_fpu( struct task_struct *tsk, struct user_fxsr_struct *fpu )
+{
+	int fpvalid;
+	
+	fpvalid = tsk->used_math && cpu_has_fxsr;
+	if ( fpvalid ) {
+		if (tsk == current) unlazy_fpu( tsk );
+		memcpy( fpu, &tsk->thread.i387.fxsave,
+		sizeof(struct user_fxsr_struct) );
+	}
+	
+	return fpvalid;
+}
+
+
diff -urN -X dontdiff linux-2.5.14.vannilla/arch/i386/kernel/process.c linux2.5.14.tcore/arch/i386/kernel/process.c
--- linux-2.5.14.vannilla/arch/i386/kernel/process.c	Sun May  5 23:37:52 2002
+++ linux2.5.14.tcore/arch/i386/kernel/process.c	Wed May  8 13:39:19 2002
@@ -610,6 +610,18 @@
 
 	dump->u_fpvalid = dump_fpu (regs, &dump->i387);
 }
+/* 
+ * Capture the user space registers if the task is not running (in user space)
+ */
+int dump_task_regs(struct task_struct *tsk, struct pt_regs *regs)
+{
+	*regs = *(struct pt_regs *)((unsigned long)tsk->thread_info + THREAD_SIZE - sizeof(struct pt_regs));
+	regs->xcs &= 0xffff;
+	regs->xds &= 0xffff;
+	regs->xes &= 0xffff;
+	regs->xss &= 0xffff;
+	return 1;
+}
 
 /*
  * This special macro can be used to load a debugging register
Binary files linux-2.5.14.vannilla/core and linux2.5.14.tcore/core differ
diff -urN -X dontdiff linux-2.5.14.vannilla/fs/binfmt_elf.c linux2.5.14.tcore/fs/binfmt_elf.c
--- linux-2.5.14.vannilla/fs/binfmt_elf.c	Sun May  5 23:38:01 2002
+++ linux2.5.14.tcore/fs/binfmt_elf.c	Wed May  8 12:22:11 2002
@@ -13,6 +13,7 @@
 
 #include <linux/fs.h>
 #include <linux/stat.h>
+#include <linux/sched.h>
 #include <linux/time.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
@@ -30,6 +31,7 @@
 #include <linux/elfcore.h>
 #include <linux/init.h>
 #include <linux/highuid.h>
+#include <linux/smp.h>
 #include <linux/smp_lock.h>
 #include <linux/compiler.h>
 #include <linux/highmem.h>
@@ -958,7 +960,7 @@
 /* #define DEBUG */
 
 #ifdef DEBUG
-static void dump_regs(const char *str, elf_greg_t *r)
+static void dump_regs(const char *str, elf_gregset_t *r)
 {
 	int i;
 	static const char *regs[] = { "ebx", "ecx", "edx", "esi", "edi", "ebp",
@@ -1006,6 +1008,163 @@
 #define DUMP_SEEK(off)	\
 	if (!dump_seek(file, (off))) \
 		goto end_coredump;
+
+static inline void fill_elf_header(struct elfhdr *elf, int segs)
+{
+	memcpy(elf->e_ident, ELFMAG, SELFMAG);
+	elf->e_ident[EI_CLASS] = ELF_CLASS;
+	elf->e_ident[EI_DATA] = ELF_DATA;
+	elf->e_ident[EI_VERSION] = EV_CURRENT;
+	memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
+
+	elf->e_type = ET_CORE;
+	elf->e_machine = ELF_ARCH;
+	elf->e_version = EV_CURRENT;
+	elf->e_entry = 0;
+	elf->e_phoff = sizeof(struct elfhdr);
+	elf->e_shoff = 0;
+	elf->e_flags = 0;
+	elf->e_ehsize = sizeof(struct elfhdr);
+	elf->e_phentsize = sizeof(struct elf_phdr);
+	elf->e_phnum = segs;
+	elf->e_shentsize = 0;
+	elf->e_shnum = 0;
+	elf->e_shstrndx = 0;
+	return;
+}
+
+static inline void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, off_t offset)
+{
+	phdr->p_type = PT_NOTE;
+	phdr->p_offset = offset;
+	phdr->p_vaddr = 0;
+	phdr->p_paddr = 0;
+	phdr->p_filesz = sz;
+	phdr->p_memsz = 0;
+	phdr->p_flags = 0;
+	phdr->p_align = 0;
+	return;
+}
+
+static inline void fill_note(struct memelfnote *note, const char *name, int type, 
+		unsigned int sz, void *data)
+{
+	note->name = name;
+	note->type = type;
+	note->datasz = sz;
+	note->data = data;
+	return;
+}
+
+/*
+ * fill up all the fields in prstatus from the given task struct, except registers
+ * which need to be filled up seperately.
+ */
+static inline void fill_prstatus(struct elf_prstatus *prstatus, struct task_struct *p, long signr) 
+{
+	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
+	prstatus->pr_sigpend = p->pending.signal.sig[0];
+	prstatus->pr_sighold = p->blocked.sig[0];
+	prstatus->pr_pid = p->pid;
+	prstatus->pr_ppid = p->parent->pid;
+	prstatus->pr_pgrp = p->pgrp;
+	prstatus->pr_sid = p->session;
+	prstatus->pr_utime.tv_sec = CT_TO_SECS(p->times.tms_utime);
+	prstatus->pr_utime.tv_usec = CT_TO_USECS(p->times.tms_utime);
+	prstatus->pr_stime.tv_sec = CT_TO_SECS(p->times.tms_stime);
+	prstatus->pr_stime.tv_usec = CT_TO_USECS(p->times.tms_stime);
+	prstatus->pr_cutime.tv_sec = CT_TO_SECS(p->times.tms_cutime);
+	prstatus->pr_cutime.tv_usec = CT_TO_USECS(p->times.tms_cutime);
+	prstatus->pr_cstime.tv_sec = CT_TO_SECS(p->times.tms_cstime);
+	prstatus->pr_cstime.tv_usec = CT_TO_USECS(p->times.tms_cstime);
+	return;
+}
+
+static inline void fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p)
+{
+	int i;
+	
+	psinfo->pr_pid = p->pid;
+	psinfo->pr_ppid = p->parent->pid;
+	psinfo->pr_pgrp = p->pgrp;
+	psinfo->pr_sid = p->session;
+
+	i = p->state ? ffz(~p->state) + 1 : 0;
+	psinfo->pr_state = i;
+	psinfo->pr_sname = (i < 0 || i > 5) ? '.' : "RSDZTD"[i];
+	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
+	psinfo->pr_nice =  task_nice(p);
+	psinfo->pr_flag = p->flags;
+	psinfo->pr_uid = NEW_TO_OLD_UID(p->uid);
+	psinfo->pr_gid = NEW_TO_OLD_GID(p->gid);
+	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
+	return;
+}
+
+/*
+ * This is the variable that can be set in proc to determine if we want to
+ * dump a multithreaded core or not. A value of 1 means yes while any
+ * other value means no.
+ *
+ * It is located at /proc/sys/kernel/core_dumps_threads
+ */
+extern int core_dumps_threads;
+
+/* Here is the structure in which status of each thread is captured. */
+struct elf_thread_status
+{
+	struct list_head list;
+	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
+	elf_fpregset_t fpu;		/* NT_PRFPREG */
+	elf_fpxregset_t xfpu;		/* NT_PRXFPREG */
+	struct memelfnote notes[3];
+	int num_notes;
+};
+
+/*
+ * In order to add the specific thread information for the elf file format,
+ * we need to keep a linked list of every threads pr_status and then
+ * create a single section for them in the final core file.
+ */
+static int elf_dump_thread_status(long signr, struct task_struct * p, struct list_head * thread_list)
+{
+
+	struct elf_thread_status *t;
+	int sz = 0;
+
+	t = kmalloc(sizeof(*t), GFP_KERNEL);
+	if (!t) {
+		printk(KERN_WARNING "Cannot allocate memory for thread status.\n");
+		return 0;
+	}
+
+	INIT_LIST_HEAD(&t->list);
+	t->num_notes = 0;
+
+	fill_prstatus(&t->prstatus, p, signr);
+	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);	
+	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus), &(t->prstatus));
+	t->num_notes++;
+	sz += notesize(&t->notes[0]);
+
+	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, &t->fpu))) {
+		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu), &(t->fpu));
+		t->num_notes++;
+		sz += notesize(&t->notes[1]);
+	}
+
+	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
+		fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu), &(t->xfpu));
+		t->num_notes++;
+		sz += notesize(&t->notes[2]);
+	}
+	
+	list_add(&t->list, thread_list);
+	return sz;
+}
+
+
+
 /*
  * Actual dumper
  *
@@ -1024,12 +1183,30 @@
 	struct elfhdr elf;
 	off_t offset = 0, dataoff;
 	unsigned long limit = current->rlim[RLIMIT_CORE].rlim_cur;
-	int numnote = 4;
-	struct memelfnote notes[4];
+	int numnote = 5;
+	struct memelfnote notes[5];
 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
-	elf_fpregset_t fpu;		/* NT_PRFPREG */
 	struct elf_prpsinfo psinfo;	/* NT_PRPSINFO */
+ 	struct task_struct *p;
+ 	LIST_HEAD(thread_list);
+ 	struct list_head *t;
+	elf_fpregset_t fpu;
+	elf_fpxregset_t xfpu;
+	int dump_threads = core_dumps_threads; /* this value should not change once the */
+					/* dumping starts */
+	int thread_status_size = 0;
+	
 
+	/* First pause all related threaded processes */
+	if (dump_threads)	{
+		suspend_threads();
+	}
+	
+	/* now stop all vm operations */
+	down_write(&current->mm->mmap_sem);
+	segs = current->mm->map_count;
+
+	
 	/* first copy the parameters from user space */
 	memset(&psinfo, 0, sizeof(psinfo));
 	{
@@ -1063,35 +1240,32 @@
 	else
 		*(struct pt_regs *)&prstatus.pr_reg = *regs;
 #endif
+ 
+	if (dump_threads) {
+		/* capture the status of all other threads */
+		if (signr) {
+			read_lock(&tasklist_lock);
+			for_each_task(p)
+				if (current->mm == p->mm && current != p) {
+					int sz = elf_dump_thread_status(signr, p, &thread_list);
+					if (!sz) {
+						read_unlock(&tasklist_lock);
+						goto cleanup;
+					}
+					else
+						thread_status_size += sz;
+				}
+			read_unlock(&tasklist_lock);
+		}
+	} /* End if(dump_threads) */
 
-	/* now stop all vm operations */
-	down_write(&current->mm->mmap_sem);
-	segs = current->mm->map_count;
 
 #ifdef DEBUG
 	printk("elf_core_dump: %d segs %lu limit\n", segs, limit);
 #endif
 
 	/* Set up header */
-	memcpy(elf.e_ident, ELFMAG, SELFMAG);
-	elf.e_ident[EI_CLASS] = ELF_CLASS;
-	elf.e_ident[EI_DATA] = ELF_DATA;
-	elf.e_ident[EI_VERSION] = EV_CURRENT;
-	memset(elf.e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
-
-	elf.e_type = ET_CORE;
-	elf.e_machine = ELF_ARCH;
-	elf.e_version = EV_CURRENT;
-	elf.e_entry = 0;
-	elf.e_phoff = sizeof(elf);
-	elf.e_shoff = 0;
-	elf.e_flags = 0;
-	elf.e_ehsize = sizeof(elf);
-	elf.e_phentsize = sizeof(struct elf_phdr);
-	elf.e_phnum = segs+1;		/* Include notes */
-	elf.e_shentsize = 0;
-	elf.e_shnum = 0;
-	elf.e_shstrndx = 0;
+	fill_elf_header(&elf, segs+1); /* including notes section*/
 
 	fs = get_fs();
 	set_fs(KERNEL_DS);
@@ -1108,64 +1282,35 @@
 	 * with info from their /proc.
 	 */
 
-	notes[0].name = "CORE";
-	notes[0].type = NT_PRSTATUS;
-	notes[0].datasz = sizeof(prstatus);
-	notes[0].data = &prstatus;
-	prstatus.pr_info.si_signo = prstatus.pr_cursig = signr;
-	prstatus.pr_sigpend = current->pending.signal.sig[0];
-	prstatus.pr_sighold = current->blocked.sig[0];
-	psinfo.pr_pid = prstatus.pr_pid = current->pid;
-	psinfo.pr_ppid = prstatus.pr_ppid = current->parent->pid;
-	psinfo.pr_pgrp = prstatus.pr_pgrp = current->pgrp;
-	psinfo.pr_sid = prstatus.pr_sid = current->session;
-	prstatus.pr_utime.tv_sec = CT_TO_SECS(current->times.tms_utime);
-	prstatus.pr_utime.tv_usec = CT_TO_USECS(current->times.tms_utime);
-	prstatus.pr_stime.tv_sec = CT_TO_SECS(current->times.tms_stime);
-	prstatus.pr_stime.tv_usec = CT_TO_USECS(current->times.tms_stime);
-	prstatus.pr_cutime.tv_sec = CT_TO_SECS(current->times.tms_cutime);
-	prstatus.pr_cutime.tv_usec = CT_TO_USECS(current->times.tms_cutime);
-	prstatus.pr_cstime.tv_sec = CT_TO_SECS(current->times.tms_cstime);
-	prstatus.pr_cstime.tv_usec = CT_TO_USECS(current->times.tms_cstime);
+	
+	fill_prstatus(&prstatus, current, signr);
+	fill_note(&notes[0], "CORE", NT_PRSTATUS, sizeof(prstatus), &prstatus);
+	
+	elf_core_copy_regs(&prstatus.pr_reg, regs);
+ 	
+	fill_psinfo(&psinfo, current);
+	fill_note(&notes[1], "CORE", NT_PRPSINFO, sizeof(psinfo), &psinfo);
+	
+	fill_note(&notes[2], "CORE", NT_TASKSTRUCT, sizeof(*current), current);
+  
+  	/* Try to dump the FPU. */
+	if ((prstatus.pr_fpvalid = elf_core_copy_task_fpregs(current, &fpu))) {
+		fill_note(&notes[3], "CORE", NT_PRFPREG, sizeof(fpu), &fpu);
+	} else {
+		--numnote;
+ 	}
+	if (elf_core_copy_task_xfpregs(current, &xfpu)) {
+		fill_note(&notes[4], "LINUX", NT_PRXFPREG, sizeof(xfpu), &xfpu);
+	} else {
+		--numnote;
+	}
+  	
 
 #ifdef DEBUG
 	dump_regs("Passed in regs", (elf_greg_t *)regs);
 	dump_regs("prstatus regs", (elf_greg_t *)&prstatus.pr_reg);
 #endif
 
-	notes[1].name = "CORE";
-	notes[1].type = NT_PRPSINFO;
-	notes[1].datasz = sizeof(psinfo);
-	notes[1].data = &psinfo;
-	i = current->state ? ffz(~current->state) + 1 : 0;
-	psinfo.pr_state = i;
-	psinfo.pr_sname = (i < 0 || i > 5) ? '.' : "RSDZTD"[i];
-	psinfo.pr_zomb = psinfo.pr_sname == 'Z';
-	psinfo.pr_nice = task_nice(current);
-	psinfo.pr_flag = current->flags;
-	psinfo.pr_uid = NEW_TO_OLD_UID(current->uid);
-	psinfo.pr_gid = NEW_TO_OLD_GID(current->gid);
-	strncpy(psinfo.pr_fname, current->comm, sizeof(psinfo.pr_fname));
-
-	notes[2].name = "CORE";
-	notes[2].type = NT_TASKSTRUCT;
-	notes[2].datasz = sizeof(*current);
-	notes[2].data = current;
-
-	/* Try to dump the FPU. */
-	prstatus.pr_fpvalid = dump_fpu (regs, &fpu);
-	if (!prstatus.pr_fpvalid)
-	{
-		numnote--;
-	}
-	else
-	{
-		notes[3].name = "CORE";
-		notes[3].type = NT_PRFPREG;
-		notes[3].datasz = sizeof(fpu);
-		notes[3].data = &fpu;
-	}
-	
 	/* Write notes phdr entry */
 	{
 		struct elf_phdr phdr;
@@ -1173,17 +1318,12 @@
 
 		for(i = 0; i < numnote; i++)
 			sz += notesize(&notes[i]);
+		
+		if (dump_threads)
+			sz += thread_status_size;
 
-		phdr.p_type = PT_NOTE;
-		phdr.p_offset = offset;
-		phdr.p_vaddr = 0;
-		phdr.p_paddr = 0;
-		phdr.p_filesz = sz;
-		phdr.p_memsz = 0;
-		phdr.p_flags = 0;
-		phdr.p_align = 0;
-
-		offset += phdr.p_filesz;
+		fill_elf_note_phdr(&phdr, sz, offset);
+		offset += sz;
 		DUMP_WRITE(&phdr, sizeof(phdr));
 	}
 
@@ -1212,10 +1352,21 @@
 		DUMP_WRITE(&phdr, sizeof(phdr));
 	}
 
+ 	/* write out the notes section */
 	for(i = 0; i < numnote; i++)
 		if (!writenote(&notes[i], file))
 			goto end_coredump;
 
+	/* write out the thread status notes section */
+ 	if (dump_threads)  {
+		list_for_each(t, &thread_list) {
+			struct elf_thread_status *tmp = list_entry(t, struct elf_thread_status, list);
+			for (i = 0; i < tmp->num_notes; i++)
+				if (!writenote(&tmp->notes[i], file))
+					goto end_coredump;
+		}
+ 	}
+ 
 	DUMP_SEEK(dataoff);
 
 	for(vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
@@ -1259,11 +1410,24 @@
 		       (off_t) file->f_pos, offset);
 	}
 
- end_coredump:
+end_coredump:
 	set_fs(fs);
+
+cleanup:
+	if (dump_threads)  {
+		while(!list_empty(&thread_list)) {
+			struct list_head *tmp = thread_list.next;
+			list_del(tmp);
+			kfree(list_entry(tmp, struct elf_thread_status, list));
+		}
+
+		resume_threads();
+	}
+
 	up_write(&current->mm->mmap_sem);
 	return has_dumped;
 }
+
 #endif		/* USE_ELF_CORE_DUMP */
 
 static int __init init_elf_binfmt(void)
diff -urN -X dontdiff linux-2.5.14.vannilla/include/asm-i386/elf.h linux2.5.14.tcore/include/asm-i386/elf.h
--- linux-2.5.14.vannilla/include/asm-i386/elf.h	Mon May  6 16:27:38 2002
+++ linux2.5.14.tcore/include/asm-i386/elf.h	Tue May  7 15:01:21 2002
@@ -99,6 +99,16 @@
 
 #ifdef __KERNEL__
 #define SET_PERSONALITY(ex, ibcs2) set_personality((ibcs2)?PER_SVR4:PER_LINUX)
+
+
+extern int dump_task_regs (struct task_struct *, struct pt_regs *);
+extern int dump_task_fpu (struct task_struct *, struct user_i387_struct *);
+extern int dump_task_extended_fpu (struct task_struct *, struct user_fxsr_struct *);
+
+#define ELF_CORE_COPY_TASK_REGS(tsk, pt_regs) dump_task_regs(tsk, pt_regs)
+#define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs)
+#define ELF_CORE_COPY_XFPREGS(tsk, elf_xfpregs) dump_task_extended_fpu(tsk, elf_xfpregs)
+
 #endif
 
 #endif
diff -urN -X dontdiff linux-2.5.14.vannilla/include/linux/elf.h linux2.5.14.tcore/include/linux/elf.h
--- linux-2.5.14.vannilla/include/linux/elf.h	Mon May  6 16:27:38 2002
+++ linux2.5.14.tcore/include/linux/elf.h	Tue May  7 15:22:55 2002
@@ -576,6 +576,9 @@
 #define NT_PRPSINFO	3
 #define NT_TASKSTRUCT	4
 #define NT_PRFPXREG	20
+#define NT_PRXFPREG     0x46e62b7f	/* note name must be "LINUX" as per GDB */
+					/* from gdb5.1/include/elf/common.h */
+
 
 /* Note header in a PT_NOTE section */
 typedef struct elf32_note {
diff -urN -X dontdiff linux-2.5.14.vannilla/include/linux/elfcore.h linux2.5.14.tcore/include/linux/elfcore.h
--- linux-2.5.14.vannilla/include/linux/elfcore.h	Mon May  6 16:27:38 2002
+++ linux2.5.14.tcore/include/linux/elfcore.h	Tue May  7 15:05:01 2002
@@ -86,4 +86,55 @@
 #define PRARGSZ ELF_PRARGSZ 
 #endif
 
+#ifdef __KERNEL__
+static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *regs)
+{
+#ifdef ELF_CORE_COPY_REGS
+	ELF_CORE_COPY_REGS((*elfregs), regs)
+#else
+	if (sizeof(elf_gregset_t) != sizeof(struct pt_regs)) {
+		printk("sizeof(elf_gregset_t) (%ld) != sizeof(struct pt_regs) (%ld)\n",
+			(long)sizeof(elf_gregset_t), (long)sizeof(struct pt_regs));
+	} else
+		*(struct pt_regs *)elfregs = *regs;
+#endif
+}
+
+static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t *elfregs)
+{
+#ifdef ELF_CORE_COPY_TASK_REGS
+	struct pt_regs regs;
+	
+	if (ELF_CORE_COPY_TASK_REGS(t, &regs)) {
+		elf_core_copy_regs(elfregs, &regs);
+		return 1;
+	}
+#endif
+	return 0;
+}
+
+extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
+
+static inline int elf_core_copy_task_fpregs(struct task_struct *t, elf_fpregset_t *fpu)
+{
+#ifdef ELF_CORE_COPY_FPREGS
+	return ELF_CORE_COPY_FPREGS(t, fpu);
+#else
+	return dump_fpu(NULL, fpu);
+#endif
+}
+
+static inline int elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregset_t *xfpu)
+{
+#ifdef ELF_CORE_COPY_XFPREGS
+	return ELF_CORE_COPY_XFPREGS(t, xfpu);
+#else
+	return 0;
+#endif
+}
+
+
+#endif /* __KERNEL__ */
+
+
 #endif /* _LINUX_ELFCORE_H */
diff -urN -X dontdiff linux-2.5.14.vannilla/include/linux/sched.h linux2.5.14.tcore/include/linux/sched.h
--- linux-2.5.14.vannilla/include/linux/sched.h	Mon May  6 16:40:19 2002
+++ linux2.5.14.tcore/include/linux/sched.h	Tue May  7 14:44:09 2002
@@ -130,6 +130,12 @@
 
 #include <linux/spinlock.h>
 
+
+/* functions for pausing and resumming functions common mm's without using signals */
+extern void suspend_threads( void );
+extern void resume_threads( void );
+
+
 /*
  * This serializes "schedule()" and also protects
  * the run-queue from deletions/modifications (but
diff -urN -X dontdiff linux-2.5.14.vannilla/include/linux/sysctl.h linux2.5.14.tcore/include/linux/sysctl.h
--- linux-2.5.14.vannilla/include/linux/sysctl.h	Mon May  6 16:26:18 2002
+++ linux2.5.14.tcore/include/linux/sysctl.h	Tue May  7 14:44:11 2002
@@ -87,6 +87,7 @@
 	KERN_CAP_BSET=14,	/* int: capability bounding set */
 	KERN_PANIC=15,		/* int: panic timeout */
 	KERN_REALROOTDEV=16,	/* real root device to mount after initrd */
+	KERN_CORE_DUMPS_THREADS=17, /* int: include status of others threads in dump */
 
 	KERN_SPARC_REBOOT=21,	/* reboot command on Sparc */
 	KERN_CTLALTDEL=22,	/* int: allow ctl-alt-del to reboot */
diff -urN -X dontdiff linux-2.5.14.vannilla/kernel/sched.c linux2.5.14.tcore/kernel/sched.c
--- linux-2.5.14.vannilla/kernel/sched.c	Sun May  5 23:37:57 2002
+++ linux2.5.14.tcore/kernel/sched.c	Thu May  9 09:26:30 2002
@@ -154,7 +154,8 @@
 	list_t migration_queue;
 } ____cacheline_aligned;
 
-static struct runqueue runqueues[NR_CPUS] __cacheline_aligned;
+static struct runqueue runqueues[NR_CPUS + 1] __cacheline_aligned;
+#define PHANTOM_CPU NR_CPUS
 
 #define cpu_rq(cpu)		(runqueues + (cpu))
 #define this_rq()		cpu_rq(smp_processor_id())
@@ -263,6 +264,9 @@
 #ifdef CONFIG_SMP
 	int need_resched, nrpolling;
 
+	if( unlikely(!p->cpus_allowed) )
+			return;
+			
 	preempt_disable();
 	/* minimise the chance of sending an interrupt to poll_idle() */
 	nrpolling = test_tsk_thread_flag(p,TIF_POLLING_NRFLAG);
@@ -273,6 +277,9 @@
 		smp_send_reschedule(p->thread_info->cpu);
 	preempt_enable();
 #else
+	// do we need the cpus_allowed test here for core_dumps_threads?
+	//if( unlikely(!p->cpus_allowed) return; // ?
+	
 	set_tsk_need_resched(p);
 #endif
 }
@@ -339,7 +346,7 @@
 	p->state = TASK_RUNNING;
 	if (!p->array) {
 		activate_task(p, rq);
-		if (p->prio < rq->curr->prio)
+		if (p->cpus_allowed && (p->prio < rq->curr->prio) )
 			resched_task(rq->curr);
 		success = 1;
 	}
@@ -996,6 +1003,135 @@
 
 void scheduling_functions_end_here(void) { }
 
+/*
+ * needed for accurate core dumps of multi-threaded applications.
+ * see binfmt_elf.c for more information.
+ */
+static void reschedule_other_cpus(void)
+{
+#ifdef CONFIG_SMP
+	int i, cpu;
+	struct task_struct *p;
+
+	for(i=0; i< smp_num_cpus; i++) {
+		cpu = cpu_logical_map(i);
+		p = cpu_curr(cpu);
+		if (p->thread_info->cpu!= smp_processor_id()) {
+			set_tsk_need_resched(p);
+			smp_send_reschedule(p->thread_info->cpu);
+		}
+	}
+#endif	
+	return;
+}
+
+
+/* functions for pausing and resumming functions with out using signals */
+void suspend_threads(void)
+{
+	unsigned long flags;
+	runqueue_t *phantomQ;
+	task_t *threads[NR_CPUS], *p;
+	int i, OnCPUCount = 0;
+
+//
+// grab all the rq_locks.
+// current is the process dumping core
+//  
+
+	preempt_disable();
+	
+	local_irq_save(flags);
+
+	for(i=0; i< smp_num_cpus; i++) {
+		spin_lock(&cpu_rq(i)->lock);
+	}
+
+	current->cpus_allowed = 1UL << current->thread_info->cpu;
+	// prevent migraion of dumping process making life complicated.
+
+	phantomQ = cpu_rq(PHANTOM_CPU); 
+	spin_lock(&phantomQ->lock);
+	
+	reschedule_other_cpus();
+	// this is an optional IPI, but it makes for the most accurate core files possible.
+	
+	read_lock(&tasklist_lock);
+
+	for_each_task(p) {
+		if (current->mm == p->mm && current != p) {
+			if( p == task_rq(p)->curr ) {
+				//then remember it for later us of set_cpus_allowed
+				threads[OnCPUCount] = p;
+				p->cpus_allowed = 0;//prevent load balance from moving these guys.
+				OnCPUCount ++;
+			} else {
+				// we manualy move the process to the phantom run queue.
+
+				if (p->array) {
+					deactivate_task(p, task_rq(p));
+					activate_task(p, phantomQ);
+				}
+				p->thread_info->cpu = PHANTOM_CPU;
+				p->cpus_allowed = 0;//prevent load balance from moving these guys.
+			}
+		}
+	}
+	read_unlock(&tasklist_lock);
+
+	spin_unlock(&phantomQ->lock);
+	for(i=smp_num_cpus-1; 0<= i; i--) {
+		spin_unlock(&cpu_rq(i)->lock);
+	}
+
+	local_irq_restore(flags);
+
+	for( i = 0; i<OnCPUCount; i++) {
+		set_cpus_allowed(threads[i], 0);
+	}
+	
+}
+
+void resume_threads(void)
+{
+	unsigned long flags;
+	runqueue_t *phantomQ;
+	task_t *p;
+	int i;
+
+	local_irq_save(flags);
+	phantomQ = cpu_rq(PHANTOM_CPU);
+
+	for(i=0; i< smp_num_cpus; i++) {
+		spin_lock(&cpu_rq(i)->lock);
+	}
+	spin_lock(&phantomQ->lock);
+	
+	read_lock(&tasklist_lock);
+	for_each_task(p) {
+		if (current->mm == p->mm && current != p) {
+			p->cpus_allowed = 1UL << current->thread_info->cpu;
+			if (p->array) {
+				deactivate_task(p,phantomQ);
+				activate_task(p, task_rq(current));
+			}
+			p->thread_info->cpu = current->thread_info->cpu;
+		}
+	}
+
+	read_unlock(&tasklist_lock);
+
+	spin_unlock(&phantomQ->lock);
+	for(i=smp_num_cpus-1; 0<= i; i--) {
+		spin_unlock(&cpu_rq(i)->lock);
+	}
+
+	local_irq_restore(flags);
+	preempt_enable_no_resched();
+}
+
+
+
 void set_user_nice(task_t *p, long nice)
 {
 	unsigned long flags;
@@ -1582,11 +1718,11 @@
 {
 	runqueue_t *rq;
 	int i, j, k;
+	prio_array_t *array;
 
-	for (i = 0; i < NR_CPUS; i++) {
-		runqueue_t *rq = cpu_rq(i);
-		prio_array_t *array;
 
+	for (i = 0; i < NR_CPUS; i++) {
+		rq = cpu_rq(i);
 		rq->active = rq->arrays;
 		rq->expired = rq->arrays + 1;
 		spin_lock_init(&rq->lock);
@@ -1603,6 +1739,28 @@
 			__set_bit(MAX_PRIO, array->bitmap);
 		}
 	}
+
+ 
+	i = PHANTOM_CPU;
+	rq = cpu_rq(i);
+	rq->active = rq->arrays;
+	rq->expired = rq->arrays + 1;
+	rq->curr = NULL;
+	spin_lock_init(&rq->lock);
+	spin_lock_init(&rq->frozen);
+	INIT_LIST_HEAD(&rq->migration_queue);
+
+	for (j = 0; j < 2; j++) {
+		array = rq->arrays + j;
+		for (k = 0; k < MAX_PRIO; k++) {
+			INIT_LIST_HEAD(array->queue + k);
+			__clear_bit(k, array->bitmap);
+		}
+		// delimiter for bitsearch
+		__set_bit(MAX_PRIO, array->bitmap);
+	}
+
+
 	/*
 	 * We have to do a little magic to get the first
 	 * process right in SMP mode.
@@ -1662,9 +1820,11 @@
 	migration_req_t req;
 	runqueue_t *rq;
 
-	new_mask &= cpu_online_map;
-	if (!new_mask)
-		BUG();
+	if(new_mask){ // can be O for TCore process suspends
+		new_mask &= cpu_online_map;
+		if (!new_mask)
+			BUG();
+	}
 
 	preempt_disable();
 	rq = task_rq_lock(p, &flags);
@@ -1737,7 +1897,12 @@
 		spin_unlock_irqrestore(&rq->lock, flags);
 
 		p = req->task;
-		cpu_dest = __ffs(p->cpus_allowed);
+
+		if( p->cpus_allowed)
+			cpu_dest = __ffs(p->cpus_allowed);
+		else
+			cpu_dest = PHANTOM_CPU;
+
 		rq_dest = cpu_rq(cpu_dest);
 repeat:
 		cpu_src = p->thread_info->cpu;
diff -urN -X dontdiff linux-2.5.14.vannilla/kernel/sysctl.c linux2.5.14.tcore/kernel/sysctl.c
--- linux-2.5.14.vannilla/kernel/sysctl.c	Sun May  5 23:37:54 2002
+++ linux2.5.14.tcore/kernel/sysctl.c	Tue May  7 14:39:37 2002
@@ -38,6 +38,8 @@
 #include <linux/nfs_fs.h>
 #endif
 
+int core_dumps_threads = 0;
+
 #if defined(CONFIG_SYSCTL)
 
 /* External variables not in a header file. */
@@ -171,7 +173,9 @@
 	 0644, NULL, &proc_dointvec},
 	{KERN_TAINTED, "tainted", &tainted, sizeof(int),
 	 0644, NULL, &proc_dointvec},
-	{KERN_CAP_BSET, "cap-bound", &cap_bset, sizeof(kernel_cap_t),
+	{KERN_CORE_DUMPS_THREADS, "core_dumps_threads", &core_dumps_threads, sizeof(int),
+	 0644, NULL, &proc_dointvec},
+	 {KERN_CAP_BSET, "cap-bound", &cap_bset, sizeof(kernel_cap_t),
 	 0600, NULL, &proc_dointvec_bset},
 #ifdef CONFIG_BLK_DEV_INITRD
 	{KERN_REALROOTDEV, "real-root-dev", &real_root_dev, sizeof(int),

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] multithreaded coredumps for elf exeecutables for O(1)  scheduler
  2002-05-10 17:13 Manfred Spraul
@ 2002-05-10 15:13 ` Mark Gross
  2002-05-13  7:35   ` Vamsi Krishna S.
  0 siblings, 1 reply; 6+ messages in thread
From: Mark Gross @ 2002-05-10 15:13 UTC (permalink / raw)
  To: Manfred Spraul, linux-kernel

ia64 requires more tweaks than just this to work.  This patch as it stands 
isn't expected to work for ia64.

Getting the register states for the note sections is more involved for ia64 
as well as the avoiding of patch collisions with  the diffs in 
/pub/linux/kernel/ports/ia64/v2.5/ 

I have an ia64 patch set thats partially tested for 2.4.17, that seems to 
work.  It didn't get posted as O(1) support became a bigger priority.

I'm hoping to start updating the ia64 patch to support 2.5x very soon.

--mgross

On Friday 10 May 2002 01:13 pm, Manfred Spraul wrote:
> Have you checked that your patch doesn't deadlock on ia64?
>
> > +       /* First pause all related threaded processes */
> > +       if (dump_threads)       {
> > +               suspend_threads();
> > +       }
> > +
> > +       /* now stop all vm operations */
> > +       down_write(&current->mm->mmap_sem);
> > +       segs = current->mm->map_count;
> > +
>
> Stopping all vm operations means that copy_{to,from}_user can cause
> deadlocks.
> ia64 needs copy_to_user in their stack unwind handler, IIRC called by
> ELF_CORE_COPY_REGS.
>
> Afaics you don't handle that. You must dump all thread state before
> down_write(mmap_sem). And I don't see how you protect against 2 threads
> of one process calling suspend_threads() simultaneously.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] multithreaded coredumps for elf exeecutables for O(1)  scheduler
@ 2002-05-10 17:13 Manfred Spraul
  2002-05-10 15:13 ` Mark Gross
  0 siblings, 1 reply; 6+ messages in thread
From: Manfred Spraul @ 2002-05-10 17:13 UTC (permalink / raw)
  To: Mark Gross, linux-kernel

Have you checked that your patch doesn't deadlock on ia64?

> +       /* First pause all related threaded processes */
> +       if (dump_threads)       {
> +               suspend_threads();
> +       }
> +       
> +       /* now stop all vm operations */
> +       down_write(&current->mm->mmap_sem);
> +       segs = current->mm->map_count;
> +
Stopping all vm operations means that copy_{to,from}_user can cause
deadlocks.
ia64 needs copy_to_user in their stack unwind handler, IIRC called by
ELF_CORE_COPY_REGS.

Afaics you don't handle that. You must dump all thread state before
down_write(mmap_sem). And I don't see how you protect against 2 threads
of one process calling suspend_threads() simultaneously.

--
	Manfred

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] multithreaded coredumps for elf exeecutables for O(1) scheduler
  2002-05-10 13:24 [PATCH] multithreaded coredumps for elf exeecutables for O(1) scheduler Mark Gross
@ 2002-05-10 19:06 ` Daniel Jacobowitz
  0 siblings, 0 replies; 6+ messages in thread
From: Daniel Jacobowitz @ 2002-05-10 19:06 UTC (permalink / raw)
  To: Mark Gross; +Cc: linux-kernel, mark.gross, mark, vamsi, efocht

On Fri, May 10, 2002 at 09:24:14AM -0400, Mark Gross wrote:
> 
> Attached is my current patch for creating multithreaded core dump files,
> that works with the O(1) scheduler.
>  
> This is a continuation of the work posted by Vamsi Krishna back on 3/21/02.
> I'm sorry for the delay.  The problem of suspending the other thread
> processes for the duration of the core dump was a challenging problem with
> the O(1) scheduler.
>  
> Most of the patch is the same as that posted on 3/21/02 with some minor
> fixes and the rebasing to the 2.5.14 kernel.  The interesting bits are in
> the additions to sched.c to pause and resume the thread processes under the 
> O(1) scheduler.
>  
> Here I'm leveraging the work of Eric Foct for the process migration, to
> temporarily migrate the thread processes I need suspended to a "phantom
> runqueue".  This is just an additional run queue that has no cpu.  When I'm
> finish with the core dump I migrate them off the phantom run queue and
> continue processing whatever exit processing they do.
>  
> I tried a number of approaches to process pausing that didn't quite work 
> before I settled on the attached implementation.

That's a very interesting approach... I like it.

> This work has been unit test on a 2 way and 4 way SMP systems with no
> lockups so far.  YMMV.
>  
> Note: GDB 5.x will work with the core files created with this patch, provided
> the libpthread that gets loaded at gdb debug time is stripped of symbols. 
> 
> Run strip on your libpthread so files and things should work fine for you.  

Or use GDB 5.2.



-- 
Daniel Jacobowitz                           Carnegie Mellon University
MontaVista Software                         Debian GNU/Linux Developer

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] multithreaded coredumps for elf exeecutables for O(1)  scheduler
  2002-05-10 15:13 ` Mark Gross
@ 2002-05-13  7:35   ` Vamsi Krishna S.
  2002-05-13 14:25     ` Mark Gross
  0 siblings, 1 reply; 6+ messages in thread
From: Vamsi Krishna S. @ 2002-05-13  7:35 UTC (permalink / raw)
  To: mgross, linux-kernel, manfred, vamsi

Hi Mark,

Despite other problems with ia64, stopping vm ops after the status (regs) of
all the threads is captured, is a bug fix in generic code, and our
patch effectively nukes that. Not a good thing. I had posted a patch on top
of the previous tcore patch to fix this, I suppose it still applies to the
2.5.x patch. Please apply that (given here).

Vamsi.

Vamsi Krishna S.
Linux Technology Center,
IBM Software Lab, Bangalore.
Ph: +91 80 5262355 Extn: 3959
Internet: vamsi@in.ibm.com

--- 2417-tcore/fs/binfmt_elf.c.ori	Thu Mar 21 15:30:08 2002
+++ 2417-tcore/fs/binfmt_elf.c	Thu Mar 21 15:27:29 2002
@@ -1289,10 +1289,6 @@
 	int dump_threads = 0;
 	int thread_status_size = 0;
 	
-	/* now stop all vm operations */
-	down_write(&current->mm->mmap_sem);
-	segs = current->mm->map_count;
-
  	if (atomic_read(&current->mm->mm_users) != 1) {
 		dump_threads = core_dumps_threads;
 	}
@@ -1337,6 +1333,19 @@
 		}
 	} /* End if(dump_threads) */
 
+	/*
+	 * This transfers the registers from regs into the standard
+	 * coredump arrangement, whatever that is. We need to do this
+	 * before acquiring mmap_sem as on some architectures (IA64)
+	 * we may need to access user pages to get register state.
+	 */
+	memset(&prstatus, 0, sizeof(prstatus));
+	elf_core_copy_regs(&prstatus.pr_reg, regs);
+
+	/* now stop all vm operations */
+	down_write(&current->mm->mmap_sem);
+	segs = current->mm->map_count;
+
 #ifdef DEBUG
 	printk("elf_core_dump: %d segs %lu limit\n", segs, limit);
 #endif
@@ -1358,16 +1367,9 @@
 	 * Set up the notes in similar form to SVR4 core dumps made
 	 * with info from their /proc.
 	 */
-	memset(&prstatus, 0, sizeof(prstatus));
 	fill_prstatus(&prstatus, current, signr);
 	fill_note(&notes[0], "CORE", NT_PRSTATUS, sizeof(prstatus), &prstatus);
 
-	/*
-	 * This transfers the registers from regs into the standard
-	 * coredump arrangement, whatever that is.
-	 */
-	elf_core_copy_regs(&prstatus.pr_reg, regs);
-
 #ifdef DEBUG
 	dump_regs("Passed in regs", (elf_greg_t *)regs);
 	dump_regs("prstatus regs", (elf_greg_t *)&prstatus.pr_reg);



On Fri, 10 May 2002 23:46:59 +0530, Mark Gross wrote:

> ia64 requires more tweaks than just this to work.  This patch as it stands isn't
> expected to work for ia64.
> 
> Getting the register states for the note sections is more involved for ia64 as
> well as the avoiding of patch collisions with  the diffs in
> /pub/linux/kernel/ports/ia64/v2.5/
> 
> I have an ia64 patch set thats partially tested for 2.4.17, that seems to work.
> It didn't get posted as O(1) support became a bigger priority.
> 
> I'm hoping to start updating the ia64 patch to support 2.5x very soon.
> 
> --mgross
> 
> On Friday 10 May 2002 01:13 pm, Manfred Spraul wrote:
>> Have you checked that your patch doesn't deadlock on ia64?
>>
>> > +       /* First pause all related threaded processes */ +       if
>> > (dump_threads)       {
>> > +               suspend_threads();
>> > +       }
>> > +
>> > +       /* now stop all vm operations */
>> > +       down_write(&current->mm->mmap_sem); +       segs =
>> > current->mm->map_count;
>> > +
>>
>> Stopping all vm operations means that copy_{to,from}_user can cause deadlocks.
>> ia64 needs copy_to_user in their stack unwind handler, IIRC called by
>> ELF_CORE_COPY_REGS.
>>
>> Afaics you don't handle that. You must dump all thread state before
>> down_write(mmap_sem). And I don't see how you protect against 2 threads of one
>> process calling suspend_threads() simultaneously.
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the
> body of a message to majordomo@vger.kernel.org More majordomo info at
> http://vger.kernel.org/majordomo-info.html Please read the FAQ at
> http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] multithreaded coredumps for elf exeecutables for O(1)  scheduler
  2002-05-13  7:35   ` Vamsi Krishna S.
@ 2002-05-13 14:25     ` Mark Gross
  0 siblings, 0 replies; 6+ messages in thread
From: Mark Gross @ 2002-05-13 14:25 UTC (permalink / raw)
  To: vamsi_krishna, linux-kernel, manfred, vamsi

Thanks Vamsi, 

I'll add this to the next version of the patch.  

I've gotten a few comments about function names being too generic, and the 
fact that I'm holding too many locks in the resume_threads function.   Minor 
stuff but, I want to clean that up as well. 

--mgross

On Monday 13 May 2002 03:35 am, Vamsi Krishna S. wrote:
> Hi Mark,
>
> Despite other problems with ia64, stopping vm ops after the status (regs)
> of all the threads is captured, is a bug fix in generic code, and our patch
> effectively nukes that. Not a good thing. I had posted a patch on top of
> the previous tcore patch to fix this, I suppose it still applies to the
> 2.5.x patch. Please apply that (given here).
>
> Vamsi.
>
> Vamsi Krishna S.
> Linux Technology Center,
> IBM Software Lab, Bangalore.
> Ph: +91 80 5262355 Extn: 3959
> Internet: vamsi@in.ibm.com
>
> --- 2417-tcore/fs/binfmt_elf.c.ori	Thu Mar 21 15:30:08 2002
> +++ 2417-tcore/fs/binfmt_elf.c	Thu Mar 21 15:27:29 2002
> @@ -1289,10 +1289,6 @@
>  	int dump_threads = 0;
>  	int thread_status_size = 0;
>
> -	/* now stop all vm operations */
> -	down_write(&current->mm->mmap_sem);
> -	segs = current->mm->map_count;
> -
>   	if (atomic_read(&current->mm->mm_users) != 1) {
>  		dump_threads = core_dumps_threads;
>  	}
> @@ -1337,6 +1333,19 @@
>  		}
>  	} /* End if(dump_threads) */
>
> +	/*
> +	 * This transfers the registers from regs into the standard
> +	 * coredump arrangement, whatever that is. We need to do this
> +	 * before acquiring mmap_sem as on some architectures (IA64)
> +	 * we may need to access user pages to get register state.
> +	 */
> +	memset(&prstatus, 0, sizeof(prstatus));
> +	elf_core_copy_regs(&prstatus.pr_reg, regs);
> +
> +	/* now stop all vm operations */
> +	down_write(&current->mm->mmap_sem);
> +	segs = current->mm->map_count;
> +
>  #ifdef DEBUG
>  	printk("elf_core_dump: %d segs %lu limit\n", segs, limit);
>  #endif
> @@ -1358,16 +1367,9 @@
>  	 * Set up the notes in similar form to SVR4 core dumps made
>  	 * with info from their /proc.
>  	 */
> -	memset(&prstatus, 0, sizeof(prstatus));
>  	fill_prstatus(&prstatus, current, signr);
>  	fill_note(&notes[0], "CORE", NT_PRSTATUS, sizeof(prstatus), &prstatus);
>
> -	/*
> -	 * This transfers the registers from regs into the standard
> -	 * coredump arrangement, whatever that is.
> -	 */
> -	elf_core_copy_regs(&prstatus.pr_reg, regs);
> -
>  #ifdef DEBUG
>  	dump_regs("Passed in regs", (elf_greg_t *)regs);
>  	dump_regs("prstatus regs", (elf_greg_t *)&prstatus.pr_reg);
>
> On Fri, 10 May 2002 23:46:59 +0530, Mark Gross wrote:
> > ia64 requires more tweaks than just this to work.  This patch as it
> > stands isn't expected to work for ia64.
> >
> > Getting the register states for the note sections is more involved for
> > ia64 as well as the avoiding of patch collisions with  the diffs in
> > /pub/linux/kernel/ports/ia64/v2.5/
> >
> > I have an ia64 patch set thats partially tested for 2.4.17, that seems to
> > work. It didn't get posted as O(1) support became a bigger priority.
> >
> > I'm hoping to start updating the ia64 patch to support 2.5x very soon.
> >
> > --mgross
> >
> > On Friday 10 May 2002 01:13 pm, Manfred Spraul wrote:
> >> Have you checked that your patch doesn't deadlock on ia64?
> >>
> >> > +       /* First pause all related threaded processes */ +       if
> >> > (dump_threads)       {
> >> > +               suspend_threads();
> >> > +       }
> >> > +
> >> > +       /* now stop all vm operations */
> >> > +       down_write(&current->mm->mmap_sem); +       segs =
> >> > current->mm->map_count;
> >> > +
> >>
> >> Stopping all vm operations means that copy_{to,from}_user can cause
> >> deadlocks. ia64 needs copy_to_user in their stack unwind handler, IIRC
> >> called by ELF_CORE_COPY_REGS.
> >>
> >> Afaics you don't handle that. You must dump all thread state before
> >> down_write(mmap_sem). And I don't see how you protect against 2 threads
> >> of one process calling suspend_threads() simultaneously.
> >
> > -
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel"
> > in the body of a message to majordomo@vger.kernel.org More majordomo info
> > at http://vger.kernel.org/majordomo-info.html Please read the FAQ at
> > http://www.tux.org/lkml/
>
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2002-05-13 17:25 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-05-10 13:24 [PATCH] multithreaded coredumps for elf exeecutables for O(1) scheduler Mark Gross
2002-05-10 19:06 ` Daniel Jacobowitz
  -- strict thread matches above, loose matches on Subject: below --
2002-05-10 17:13 Manfred Spraul
2002-05-10 15:13 ` Mark Gross
2002-05-13  7:35   ` Vamsi Krishna S.
2002-05-13 14:25     ` Mark Gross

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox