All of lore.kernel.org
 help / color / mirror / Atom feed
From: Pete Zaitcev <zaitcev@redhat.com>
To: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Pete Zaitcev <zaitcev@redhat.com>, linux-kernel@vger.kernel.org
Subject: Re: 2.4.19-pre3 s390 patch for hwc_con.c
Date: Mon, 18 Mar 2002 18:31:00 -0500	[thread overview]
Message-ID: <20020318183100.A15251@devserv.devel.redhat.com> (raw)
In-Reply-To: <OF5991CEBE.CF23F14A-ONC1256B80.0042A9FE@de.ibm.com>

> To: Pete Zaitcev <zaitcev@redhat.com>
> Cc: linux-kernel@vger.kernel.org
> From: "Martin Schwidefsky" <schwidefsky@de.ibm.com>
> Date: Mon, 18 Mar 2002 15:03:49 +0100

>[...]
> Yep makes sense as well. I actually made the same fix for the kernel
> 2.5.6. I have it basically going but SMP is still a bit broken. It
> hangs on boot with 5 cpus. Seems like the startup of the migration
> threads doesn't complete because load_balance isn't balancing...

Darn, I re-invented the wheel again, it seems.
Attached is the patch to have O(1) on 2.4.18-RH (I backported
some of your things from 2.4.19-pre3). I am not sure if it works
right at all. I would like to see yours, unless it's already in
Linus' tree.

-- Pete

diff -urN -X dontdiff linux-2.4.17-0.18/arch/s390/kernel/process.c linux-2.4.17-0.18-t1/arch/s390/kernel/process.c
--- linux-2.4.17-0.18/arch/s390/kernel/process.c	Mon Feb 18 20:02:26 2002
+++ linux-2.4.17-0.18-t1/arch/s390/kernel/process.c	Tue Feb 19 03:37:28 2002
@@ -52,12 +52,8 @@
 
 static psw_t wait_psw;
 
-int cpu_idle(void *unused)
+int cpu_idle(void)
 {
-	/* endless idle loop with no priority at all */
-        init_idle();
-	current->nice = 20;
-	current->counter = -100;
 	wait_psw.mask = _WAIT_PSW_MASK;
 	wait_psw.addr = (unsigned long) &&idle_wakeup | 0x80000000L;
 	while(1) {
@@ -82,7 +78,7 @@
 {
 	struct task_struct *tsk = current;
 
-        printk("CPU:    %d    %s\n", tsk->processor, print_tainted());
+	printk("CPU:    %d    %s\n", tsk->cpu, print_tainted());
         printk("Process %s (pid: %d, task: %08lx, ksp: %08x)\n",
 	       current->comm, current->pid, (unsigned long) tsk,
 	       tsk->thread.ksp);
diff -urN -X dontdiff linux-2.4.17-0.18/arch/s390/kernel/s390_ksyms.c linux-2.4.17-0.18-t1/arch/s390/kernel/s390_ksyms.c
--- linux-2.4.17-0.18/arch/s390/kernel/s390_ksyms.c	Mon Feb 18 20:05:29 2002
+++ linux-2.4.17-0.18-t1/arch/s390/kernel/s390_ksyms.c	Wed Feb 20 02:05:42 2002
@@ -11,6 +11,7 @@
 #if CONFIG_IP_MULTICAST
 #include <net/arp.h>
 #endif
+#include <linux/sched.h>	/* XXX sys_shed_yield - broken */
 
 /*
  * memory management
@@ -60,3 +60,4 @@
 EXPORT_SYMBOL(console_mode);
 EXPORT_SYMBOL(console_device);
 EXPORT_SYMBOL_NOVERS(do_call_softirq);
+EXPORT_SYMBOL_NOVERS(sys_sched_yield);	/* XXX */
diff -urN -X dontdiff linux-2.4.17-0.18/arch/s390/kernel/smp.c linux-2.4.17-0.18-t1/arch/s390/kernel/smp.c
--- linux-2.4.17-0.18/arch/s390/kernel/smp.c	Mon Feb 18 20:02:26 2002
+++ linux-2.4.17-0.18-t1/arch/s390/kernel/smp.c	Tue Feb 19 22:02:05 2002
@@ -38,7 +38,7 @@
 #include <asm/cpcmd.h>
 
 /* prototypes */
-extern int cpu_idle(void * unused);
+extern int cpu_idle(void);
 
 extern __u16 boot_cpu_addr;
 extern volatile int __cpu_logical_map[];
@@ -49,13 +49,13 @@
 static int       max_cpus = NR_CPUS;	  /* Setup configured maximum number of CPUs to activate	*/
 int              smp_num_cpus;
 struct _lowcore *lowcore_ptr[NR_CPUS];
-cycles_t         cacheflush_time=0;
 int              smp_threads_ready=0;      /* Set when the idlers are all forked. */
 static atomic_t  smp_commenced = ATOMIC_INIT(0);
 
 spinlock_t       kernel_flag __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
 
 unsigned long	 cpu_online_map;
+unsigned long	cache_decay_ticks;
 
 /*
  *      Setup routine for controlling SMP activation
@@ -95,6 +95,8 @@
 static sigp_ccode smp_ext_bitcall(int, ec_bit_sig);
 static void smp_ext_bitcall_others(ec_bit_sig);
 
+static void do_task_migration(void);
+
 /*
  * Structure and data for smp_call_function(). This is designed to minimise
  * static memory requirements. It also looks cleaner.
@@ -259,6 +261,8 @@
 		do_machine_power_off();
 	if (test_bit(ec_call_function, &bits)) 
 		do_call_function();
+	if (test_bit(ec_task_migration, &bits))
+		do_task_migration();
 }
 
 /*
@@ -361,6 +365,35 @@
 	local_flush_tlb();
 }
 
+static spinlock_t migration_lock = SPIN_LOCK_UNLOCKED;
+static task_t *new_task;
+
+/*
+ * Task migration callback.
+ */
+static void do_task_migration(void)
+{
+	task_t *p;
+
+	p = new_task;
+	spin_unlock(&migration_lock);
+	sched_task_migrated(p);
+}
+
+/*
+ * This function sends a 'task migration' IPI to another CPU.
+ * Must be called from syscall contexts, with interrupts *enabled*.
+ */
+void smp_migrate_task(int cpu, task_t *p)
+{
+	/*
+	 * The target CPU will unlock the migration spinlock:
+	 */
+	spin_lock(&migration_lock);
+	new_task = p;
+	smp_ext_bitcall(cpu, ec_task_migration);
+}
+
 /*
  * this function sends a 'reschedule' IPI to another CPU.
  * it goes straight through and wastes no time serializing
@@ -372,6 +405,19 @@
         smp_ext_bitcall(cpu, ec_schedule);
 }
 
+#if 0 /* not used in current mingo code */
+/*
+ * this function sends a reschedule IPI to all (other) CPUs.
+ * This should only be used if some 'global' task became runnable,
+ * such as a RT task, that must be handled now. The first CPU
+ * that manages to grab the task will run it.
+ */
+void smp_send_reschedule_all(void)
+{
+	send_IPI_allbutself(RESCHEDULE_VECTOR);
+}
+#endif
+
 /*
  * parameter area for the set/clear control bit callbacks
  */
@@ -449,7 +495,7 @@
 {
         int curr_cpu;
 
-        current->processor = 0;
+        current->cpu = 0;
         smp_num_cpus = 1;
 	cpu_online_map = 1;
         for (curr_cpu = 0;
@@ -490,7 +536,7 @@
 	pfault_init();
 #endif
         /* cpu_idle will call schedule for us */
-        return cpu_idle(NULL);
+        return cpu_idle();
 }
 
 /*
@@ -528,12 +574,9 @@
         idle = init_task.prev_task;
         if (!idle)
                 panic("No idle process for CPU %d",cpu);
-        idle->processor = cpu;
-	idle->cpus_runnable = 1 << cpu; /* we schedule the first task manually */
+	init_idle(idle, cpu);
 
-        del_from_runqueue(idle);
         unhash_process(idle);
-        init_tasks[cpu] = idle;
 
         cpu_lowcore=&get_cpu_lowcore(cpu);
 	cpu_lowcore->save_area[15] = idle->thread.ksp;
@@ -585,7 +628,9 @@
                 panic("Couldn't request external interrupt 0x1202");
         smp_count_cpus();
         memset(lowcore_ptr,0,sizeof(lowcore_ptr));  
-        
+
+	cache_decay_ticks = (200 * HZ) / 1000;	/* Is 200ms ok? Robust? XXX */
+
         /*
          *      Initialize the logical to physical CPU number mapping
          */
diff -urN -X dontdiff linux-2.4.17-0.18/arch/s390/kernel/traps.c linux-2.4.17-0.18-t1/arch/s390/kernel/traps.c
--- linux-2.4.17-0.18/arch/s390/kernel/traps.c	Mon Feb 18 20:02:26 2002
+++ linux-2.4.17-0.18-t1/arch/s390/kernel/traps.c	Tue Feb 19 21:05:15 2002
@@ -135,12 +135,14 @@
 
 void show_trace_task(struct task_struct *tsk)
 {
+#if 0 /* Mingo's scheduler kills task_has_cpu, so we bite the bullet. */
 	/*
 	 * We can't print the backtrace of a running process. It is
 	 * unreliable at best and can cause kernel oopses.
 	 */
 	if (task_has_cpu(tsk))
 		return;
+#endif
 	show_trace((unsigned long *) tsk->thread.ksp);
 }
 
diff -urN -X dontdiff linux-2.4.17-0.18/arch/s390/mm/fault.c linux-2.4.17-0.18-t1/arch/s390/mm/fault.c
--- linux-2.4.17-0.18/arch/s390/mm/fault.c	Fri Nov  9 22:58:02 2001
+++ linux-2.4.17-0.18-t1/arch/s390/mm/fault.c	Tue Feb 19 03:09:46 2002
@@ -285,8 +285,7 @@
 out_of_memory:
 	up_read(&mm->mmap_sem);
 	if (tsk->pid == 1) {
-		tsk->policy |= SCHED_YIELD;
-		schedule();
+		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
 	}
diff -urN -X dontdiff linux-2.4.17-0.18/arch/s390x/kernel/process.c linux-2.4.17-0.18-t1/arch/s390x/kernel/process.c
--- linux-2.4.17-0.18/arch/s390x/kernel/process.c	Mon Feb 18 20:02:28 2002
+++ linux-2.4.17-0.18-t1/arch/s390x/kernel/process.c	Tue Feb 19 03:36:16 2002
@@ -52,12 +52,9 @@
 
 static psw_t wait_psw;
 
-int cpu_idle(void *unused)
+int cpu_idle(void)
 {
 	/* endless idle loop with no priority at all */
-        init_idle();
-	current->nice = 20;
-	current->counter = -100;
 	wait_psw.mask = _WAIT_PSW_MASK;
 	wait_psw.addr = (unsigned long) &&idle_wakeup;
 	while(1) {
diff -urN -X dontdiff linux-2.4.17-0.18/arch/s390x/kernel/smp.c linux-2.4.17-0.18-t1/arch/s390x/kernel/smp.c
--- linux-2.4.17-0.18/arch/s390x/kernel/smp.c	Mon Feb 18 20:02:28 2002
+++ linux-2.4.17-0.18-t1/arch/s390x/kernel/smp.c	Tue Feb 19 03:36:43 2002
@@ -38,7 +38,7 @@
 #include <asm/cpcmd.h>
 
 /* prototypes */
-extern int cpu_idle(void * unused);
+extern int cpu_idle(void);
 
 extern __u16 boot_cpu_addr;
 extern volatile int __cpu_logical_map[];
@@ -468,8 +468,9 @@
 	/* Enable pfault pseudo page faults on this cpu. */
 	pfault_init();
 #endif
+	init_idle();
         /* cpu_idle will call schedule for us */
-        return cpu_idle(NULL);
+        return cpu_idle();
 }
 
 /*
diff -urN -X dontdiff linux-2.4.17-0.18/arch/s390x/mm/fault.c linux-2.4.17-0.18-t1/arch/s390x/mm/fault.c
--- linux-2.4.17-0.18/arch/s390x/mm/fault.c	Fri Nov  9 22:58:02 2001
+++ linux-2.4.17-0.18-t1/arch/s390x/mm/fault.c	Tue Feb 19 03:10:28 2002
@@ -286,8 +286,7 @@
 out_of_memory:
 	up_read(&mm->mmap_sem);
 	if (tsk->pid == 1) {
-		tsk->policy |= SCHED_YIELD;
-		schedule();
+		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
 	}
diff -urN -X dontdiff linux-2.4.17-0.18/include/asm-s390/bitops.h linux-2.4.17-0.18-t1/include/asm-s390/bitops.h
--- linux-2.4.17-0.18/include/asm-s390/bitops.h	Wed Jul 25 23:12:02 2001
+++ linux-2.4.17-0.18-t1/include/asm-s390/bitops.h	Tue Feb 19 21:06:47 2002
@@ -752,6 +752,68 @@
 }
 
 /*
+ * Yet Another Bitop for mingo's scheduler. Don't use!
+ * Result is undefined if no bit exists, so code should check against 0 first.
+ *
+ * XXX Measure if this actually needs any optimization.
+ */
+static __inline__ int __ffs(unsigned long word)
+{
+	int num = 0;
+
+	if ((word & 0xffff) == 0) {
+		num += 16;
+		word >>= 16;
+	}
+	if ((word & 0xff) == 0) {
+		num += 8;
+		word >>= 8;
+	}
+	if ((word & 0xf) == 0) {
+		num += 4;
+		word >>= 4;
+	}
+	if ((word & 0x3) == 0) {
+		num += 2;
+		word >>= 2;
+	}
+	if ((word & 0x1) == 0)
+		num += 1;
+	return num;
+}
+
+/*
+ * Scheduler induced bitop, better not to use.
+ * The Ingo's brilliant design puts its find_first()
+ * counterpart into <asm/mmu_context.h>.
+ * Some implementations use void *addr as argument, which is
+ * totally broken, because we must be compatible with set_bit().
+ * The size is likely to be 140, but the map is padded with zeroes.
+ *
+ * find_next_bit - find the first set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+static __inline__ int find_next_bit(unsigned long *addr, int size, int offset)
+{
+	unsigned long *p = addr + (offset >> 5);
+	int num = offset & ~0x1f;
+	unsigned long word;
+
+	word = *p++;
+	word &= ~((1 << (offset & 0x1f)) - 1);
+	while (num < size) {
+		if (word != 0) {
+			return __ffs(word) + num;
+		}
+		word = *p++;
+		num += 0x20;
+	}
+	return num;
+}
+
+/*
  * hweightN: returns the hamming weight (i.e. the number
  * of bits set) of a N-bit word
  */
diff -urN -X dontdiff linux-2.4.17-0.18/include/asm-s390/mmu_context.h linux-2.4.17-0.18-t1/include/asm-s390/mmu_context.h
--- linux-2.4.17-0.18/include/asm-s390/mmu_context.h	Tue Feb 13 23:13:44 2001
+++ linux-2.4.17-0.18-t1/include/asm-s390/mmu_context.h	Tue Feb 19 06:03:42 2002
@@ -10,6 +10,25 @@
 #define __S390_MMU_CONTEXT_H
 
 /*
+ * Every architecture must define this function. It's the fastest
+ * way of searching a 140-bit bitmap where the first 100 bits are
+ * unlikely to be set. It's guaranteed that at least one of the 140
+ * bits is cleared.
+ */
+static inline int sched_find_first_bit(unsigned long *b)
+{
+	if (unlikely(b[0]))
+		return __ffs(b[0]);
+	if (unlikely(b[1]))
+		return __ffs(b[1]) + 32;
+	if (unlikely(b[2]))
+		return __ffs(b[2]) + 64;
+	if (b[3])
+		return __ffs(b[3]) + 96;
+	return __ffs(b[4]) + 128;
+}
+
+/*
  * get a new mmu context.. S390 don't know about contexts.
  */
 #define init_new_context(tsk,mm)        0
diff -urN -X dontdiff linux-2.4.17-0.18/include/asm-s390/sigp.h linux-2.4.17-0.18-t1/include/asm-s390/sigp.h
--- linux-2.4.17-0.18/include/asm-s390/sigp.h	Mon Feb 18 21:27:30 2002
+++ linux-2.4.17-0.18-t1/include/asm-s390/sigp.h	Tue Feb 19 21:07:33 2002
@@ -63,6 +63,7 @@
         ec_halt,
         ec_power_off,
 	ec_call_function,
+	ec_task_migration,
 	ec_bit_last
 } ec_bit_sig;
 
diff -urN -X dontdiff linux-2.4.17-0.18/include/asm-s390/smp.h linux-2.4.17-0.18-t1/include/asm-s390/smp.h
--- linux-2.4.17-0.18/include/asm-s390/smp.h   Mon Feb 18 21:27:34 2002
+++ linux-2.4.17-0.18-t1/include/asm-s390/smp.h        Tue Feb 19 21:07:36 2002
@@ -30,19 +30,7 @@
 
 #define NO_PROC_ID		0xFF		/* No processor magic marker */
 
-/*
- *	This magic constant controls our willingness to transfer
- *	a process across CPUs. Such a transfer incurs misses on the L1
- *	cache, and on a P6 or P5 with multiple L2 caches L2 hits. My
- *	gut feeling is this will vary by board in value. For a board
- *	with separate L2 cache it probably depends also on the RSS, and
- *	for a board with shared L2 cache it ought to decay fast as other
- *	processes are run.
- */
- 
-#define PROC_CHANGE_PENALTY	20		/* Schedule penalty */
-
-#define smp_processor_id() (current->processor)
+#define smp_processor_id() (current->cpu)
 
 extern __inline__ int cpu_logical_map(int cpu)
 {
diff -ur -X dontdiff linux-2.4.18-0.1.s390/include/asm-s390x/smp.h linux-2.4.18-0.1-x.s390/include/asm-s390x/smp.h
--- linux-2.4.18-0.1.s390/include/asm-s390x/smp.h	Thu Oct 11 09:43:38 2001
+++ linux-2.4.18-0.1-x.s390/include/asm-s390x/smp.h	Wed Mar  6 21:17:30 2002
@@ -30,19 +30,7 @@
 
 #define NO_PROC_ID		0xFF		/* No processor magic marker */
 
-/*
- *	This magic constant controls our willingness to transfer
- *	a process across CPUs. Such a transfer incurs misses on the L1
- *	cache, and on a P6 or P5 with multiple L2 caches L2 hits. My
- *	gut feeling is this will vary by board in value. For a board
- *	with separate L2 cache it probably depends also on the RSS, and
- *	for a board with shared L2 cache it ought to decay fast as other
- *	processes are run.
- */
- 
-#define PROC_CHANGE_PENALTY	20		/* Schedule penalty */
-
-#define smp_processor_id() (current->processor)
+#define smp_processor_id() (current->cpu)
 
 extern __inline__ int cpu_logical_map(int cpu)
 {
diff -ur -X dontdiff linux-2.4.18-0.1.s390/arch/s390/kernel/entry.S linux-2.4.18-0.1-x.s390/arch/s390/kernel/entry.S
--- linux-2.4.18-0.1.s390/arch/s390/kernel/entry.S	Mon Feb 25 11:37:56 2002
+++ linux-2.4.18-0.1-x.s390/arch/s390/kernel/entry.S	Wed Mar  6 19:18:22 2002
@@ -291,17 +289,16 @@
 ret_from_fork:  
         basr    %r13,0
         l       %r13,.Lentry_base-.(%r13)  # setup base pointer to &entry_base
+#ifdef CONFIG_SMP
+	# not saving R14 here because we go to sysc_return ultimately
+	l	%r1,BASED(.Lschedtail)
+	basr	%r14,%r1          # call schedule_tail (unlock stuff)
+#endif
         GET_CURRENT               # load pointer to task_struct to R9
         stosm   24(%r15),0x03     # reenable interrupts
         sr      %r0,%r0           # child returns 0
         st      %r0,SP_R2(%r15)   # store return value (change R2 on stack)
-#ifdef CONFIG_SMP
-        l       %r1,BASED(.Lschedtail)
-	la      %r14,BASED(sysc_return)
-        br      %r1               # call schedule_tail, return to sysc_return
-#else
         b       BASED(sysc_return)
-#endif
 
 #
 # clone, fork, vfork, exec and sigreturn need glue,
diff -ur -X dontdiff linux-2.4.18-0.1.s390/arch/s390x/kernel/entry.S linux-2.4.18-0.1-x.s390/arch/s390x/kernel/entry.S
--- linux-2.4.18-0.1.s390/arch/s390x/kernel/entry.S	Mon Feb 25 11:37:56 2002
+++ linux-2.4.18-0.1-x.s390/arch/s390x/kernel/entry.S	Wed Mar  6 19:41:58 2002
@@ -277,15 +275,13 @@
 #
         .globl  ret_from_fork
 ret_from_fork:  
+#ifdef CONFIG_SMP
+	brasl   %r14,schedule_tail
+#endif
         GET_CURRENT               # load pointer to task_struct to R9
         stosm   48(%r15),0x03     # reenable interrupts
 	xc      SP_R2(8,%r15),SP_R2(%r15) # child returns 0
-#ifdef CONFIG_SMP
-	larl    %r14,sysc_return
-        jg      schedule_tail     # return to sysc_return
-#else
         j       sysc_return
-#endif
 
 #
 # clone, fork, vfork, exec and sigreturn need glue,

  reply	other threads:[~2002-03-18 23:31 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-03-18 14:03 2.4.19-pre3 s390 patch for hwc_con.c Martin Schwidefsky
2002-03-18 23:31 ` Pete Zaitcev [this message]
  -- strict thread matches above, loose matches on Subject: below --
2002-03-15 18:29 Pete Zaitcev

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20020318183100.A15251@devserv.devel.redhat.com \
    --to=zaitcev@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=schwidefsky@de.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.