All of lore.kernel.org
 help / color / mirror / Atom feed
* Hang with fair cgroup scheduler (reproducer is attached.)
@ 2007-12-14  7:18 KAMEZAWA Hiroyuki
       [not found] ` <20071214161834.034e6efe.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
  0 siblings, 1 reply; 25+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-12-14  7:18 UTC (permalink / raw)
  To: containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org
  Cc: Andrew Morton, mingo-X9Un+BFzKDI,
	vatsa-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8

[-- Attachment #1: Type: text/plain, Size: 742 bytes --]

Hi,

While I was testing 2.6.24-rc5-mm1's fair group scheduler (with cgroup),
the system hangs. please confirm. it's reproducible on my box.

My test program is attached.

What happens:
  the system hangs. (panic ?)

Environ:
  ia64/NUMA 8CPU systems. 4 cpus per node.

How to reproduce:
  Compile attached one.
  # gcc -o reg reg.c
  Create group as following
  # mount -t cgroup none /opt/cgroup -o cpu
  # mkdir /opt/cgroup/group_1
  # mkdir /opt/cgroup/group_2
  
  And run attached program
  # ./reg 8 8

What 'reg' does;  
  usage : reg A B C...
  This program forks child process and assign 
     A of processes to group_1
     B of processes to group_2
     C of processes to group_3
  kick and waitpid all and repeat.

Thanks,
-Kame

[-- Attachment #2: reg.c --]
[-- Type: text/x-csrc, Size: 2301 bytes --]

#include <stdlib.h>
#include <stdio.h>
#include <strings.h>
#include <sys/types.h>
#include <unistd.h>
#include <sched.h>
#include <asm/intrinsics.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <errno.h>
#include <sys/times.h>

static char *shared;
#define MAX_PROCS 32
#define SHMSIZE	(16384)

struct start_stop {
	int go;
};

/* Assign PID to a group....
 * work as # echo PID > /opt/cgroup/group_%d/tasks
 */
void assign_to(int pid, int group)
{
	FILE *fp;
	char buf[32];

	memset(buf, 0, sizeof(buf));
	sprintf(buf,"/opt/cgroup/group_%d/tasks",group);
	fp = fopen(buf,"w");
	if (fp == NULL) {
		perror("fopen");
		fprintf(stderr, "failed : fopen");
		exit(0);
	}
	fprintf(fp, "%d", pid);
	fclose(fp);
	printf("%d to %s\n", pid, buf);
}

/*
 * spin wait and go into small loop.
 * # of loops are counted as score.
 * This process's utime is recorded in times[id]
 */
int worker(int id)
{
	struct start_stop *shared_flag;

	shared_flag = (struct start_stop*)shared;
	do  {
		sched_yield();
		ia64_mf();
	} while (!shared_flag->go);
}

/*
 * If you want to assign..
 * 2 proces to group 1, 3 procs to group 2 -># ./a.out 2 3
 * 3 proces to group 1, 3 procs to group 2, 3 procs to group 3
 * -># ./a.out 3 3 3
 * Total 32 procs are supported.
 */

int main(int argc, char *argv[])
{
	int nprocs;
	int shmid, i;
	struct start_stop *shared_flag;
	int pids[MAX_PROCS];
	int groups[MAX_PROCS];

	memset(pids, 0 , sizeof(pids));
	memset(groups, 0 , sizeof(groups));

again:
	for (nprocs = 0, i = 1; i < argc; i++) {
		int num = atoi(argv[i]);
		int j;
		for (j = 0; j < num; j++) {
			groups[nprocs + j] = i;
		}
		nprocs += num;
	}
	
	shmid = shmget(IPC_PRIVATE, SHMSIZE, IPC_CREAT | 0666);
	if (shmid == -1) {
		perror("shmget");
		exit(1);
	}

	shared = shmat(shmid, NULL, 0);
	shared_flag = (struct start_stop *)shared;

	memset(shared, 0, SHMSIZE);
	shmctl(shmid, IPC_RMID, 0);
	
	for (i = 0; i < nprocs; i++) {
		int ret;
		ret = fork();
		if (ret == 0) {
			worker(i);
			exit(0);
		} else if (ret == -1) {
			perror("fork");
			exit(0);
		}
		pids[i] = ret;
	}
	sleep(1);
	for (i = 0; i < nprocs; i++)
		assign_to(pids[i], groups[i]);
	sleep(1);
	ia64_mf();
	shared_flag->go = 1;
	
	for (i = 0; i < nprocs; i++) {
		int status;
		waitpid(pids[i], &status, 0);
	} 
	goto again;

	return 0;
}

[-- Attachment #3: Type: text/plain, Size: 206 bytes --]

_______________________________________________
Containers mailing list
Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
https://lists.linux-foundation.org/mailman/listinfo/containers

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Hang with fair cgroup scheduler (reproducer is attached.)
       [not found] ` <20071214161834.034e6efe.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
@ 2007-12-14  8:17   ` KAMEZAWA Hiroyuki
       [not found]     ` <20071214171759.59f7ba57.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
  2007-12-14  9:48   ` Ingo Molnar
  1 sibling, 1 reply; 25+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-12-14  8:17 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org, Andrew Morton,
	mingo-X9Un+BFzKDI, vatsa-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8

Tested again, and got NULL access and panic.

This is my guess from stack dump. (raw stack dump is attached below.)
==

static struct task_struct *pick_next_task_fair(struct rq *rq)
{
        struct cfs_rq *cfs_rq = &rq->cfs;
        struct sched_entity *se;

        if (unlikely(!cfs_rq->nr_running))
                return NULL;

        do {
                se = pick_next_entity(cfs_rq); <-- se was NULL.
                cfs_rq = group_cfs_rq(se);     <-- se->my_q causes SEGV
        } while (cfs_rq);

        return task_of(se);
}
===
Seems first_fair() was NULL in
==
static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
{
        struct sched_entity *se = NULL;

        if (first_fair(cfs_rq)) { <------------------------------(*)
                se = __pick_next_entity(cfs_rq);
                set_next_entity(cfs_rq, se);
        }

        return se;
}
==
from register information.

Thanks,
-Kame


Stack dump is here.
==
Pid: 8197, CPU 6, comm:                  reg
psr : 00001210085a2010 ifs : 8000000000000206 ip  : [<a000000100067c01>]    Not tainted
ip is at pick_next_task_fair+0x81/0xe0
unat: 0000000000000000 pfs : 0000000000000206 rsc : 0000000000000003
rnat: 0000000000000000 bsps: 0000000000000000 pr  : 0000000000556959
ldrs: 0000000000000000 ccv : 0000000000000000 fpsr: 0009804c0270033f
csd : 0000000000000000 ssd : 0000000000000000
b0  : a000000100067c00 b6  : a000000100076a60 b7  : a00000010000ee50
 NaT consumption 2216203124768 [1]^M
Modules linked in: sunrpc binfmt_misc dm_mirror dm_mod fan sg thermal e1000 processor button conta
iner e100 eepro100 mii lpfc mptspi mptscsih mptbase ehci_hcd ohci_hcd uhci_hcd^M
^M
Pid: 8197, CPU 6, comm:                  reg^M
psr : 00001210085a2010 ifs : 8000000000000206 ip  : [<a000000100067c01>]    Not tainted^M
ip is at pick_next_task_fair+0x81/0xe0^M
unat: 0000000000000000 pfs : 0000000000000206 rsc : 0000000000000003^M
rnat: 0000000000000000 bsps: 0000000000000000 pr  : 0000000000556959^M
ldrs: 0000000000000000 ccv : 0000000000000000 fpsr: 0009804c0270033f^M
csd : 0000000000000000 ssd : 0000000000000000^M
b0  : a000000100067c00 b6  : a000000100076a60 b7  : a00000010000ee50^M
f6  : 000000000000000000000 f7  : 000000000000000000000^M
f8  : 1003e00000000a0000007 f9  : 1003e00000059499dd2c3^M
f10 : 1003ece02a62ae350c355 f11 : 1003e0000000000000037^M
r1  : a000000100d87a60 r2  : 000000df13538d0b r3  : 0000000000000060^M
r8  : 0000000000000000 r9  : e00001a004034b30 r10 : 0000000000000000^M
r11 : e00001a004034aa8 r12 : e00001a10397fe10 r13 : e00001a103970000^M
r14 : 00000000d594bde3 r15 : e00001a004034ab0 r16 : e00001a004034ab8^M
r17 : e00001a004034ac8 r18 : e00001a004038320 r19 : e00001a10426ff20^M
r20 : 0000000000000000 r21 : 0000000000000000 r22 : 0000000000000001^M
r23 : e00001a004034a91 r24 : e00001a004034a90 r25 : e00001a10426ff10^M
r26 : 0000000000000002 r27 : e00001a0040382f0 r28 : e00001a004038288^M
r29 : a0000001008a5468 r30 : a000000100076a60 r31 : a000000100b726e0^M
^M
Call Trace:^M
 [<a000000100013bc0>] show_stack+0x40/0xa0^M
                                sp=e00001a10397f860 bsp=e00001a103970f18^M
 [<a000000100014840>] show_regs+0x840/0x880^M
                                sp=e00001a10397fa30 bsp=e00001a103970ec0^M
 [<a000000100036fa0>] die+0x1a0/0x2a0^M
                                sp=e00001a10397fa30 bsp=e00001a103970e78^M
 [<a0000001000370f0>] die_if_kernel+0x50/0x80^M
                                sp=e00001a10397fa30 bsp=e00001a103970e48^M
 [<a000000100038260>] ia64_fault+0x1140/0x1260^M
                                sp=e00001a10397fa30 bsp=e00001a103970de8^M
 [<a00000010000ae20>] ia64_leave_kernel+0x0/0x270^M
                                sp=e00001a10397fc40 bsp=e00001a103970de8^M
 [<a000000100067c00>] pick_next_task_fair+0x80/0xe0^M
                                sp=e00001a10397fe10 bsp=e00001a103970db8^M
 [<a0000001006f6a60>] schedule+0x8e0/0x1280^M
                                sp=e00001a10397fe10 bsp=e00001a103970d08^M
 [<a000000100074e20>] sys_sched_yield+0xe0/0x100^M
                                sp=e00001a10397fe30 bsp=e00001a103970ca8^M
 [<a00000010000aca0>] ia64_ret_from_syscall+0x0/0x20^M
                                sp=e00001a10397fe30 bsp=e00001a103970ca8^M
 [<a000000000010720>] __kernel_syscall_via_break+0x0/0x20^M
                                sp=e00001a103980000 bsp=e00001a103970ca8^M

Disassemble.
==
a000000100067b80 <pick_next_task_fair>:
a000000100067b80:       18 10 19 08 80 05       [MMB]       alloc r34=ar.pfs,6,4,0
a000000100067b86:       20 80 83 00 42 00                   adds r2=112,r32
a000000100067b8c:       00 00 00 20                         nop.b 0x0
a000000100067b90:       09 20 81 41 00 21       [MMI]       adds r36=96,r32
a000000100067b96:       00 00 00 02 00 20                   nop.m 0x0
a000000100067b9c:       04 00 c4 00                         mov r33=b0;;
a000000100067ba0:       0b 70 00 04 18 10       [MMI]       ld8 r14=[r2];;
a000000100067ba6:       70 00 38 0c 72 00                   cmp.eq p7,p6=0,r14
a000000100067bac:       00 00 04 00                         nop.i 0x0;;
a000000100067bb0:       10 00 00 00 01 c0       [MIB]       nop.m 0x0
a000000100067bb6:       81 00 00 00 c2 03             (p07) mov r8=r0
a000000100067bbc:       80 00 00 41                   (p07) br.cond.spnt.few a000000100067c30 <pick_next_task_fair+0xb
0>
a000000100067bc0:       09 48 c0 48 00 21       [MMI]       adds r9=48,r36
a000000100067bc6:       00 00 00 02 00 00                   nop.m 0x0
a000000100067bcc:       04 00 00 84                         mov r32=r0;;
a000000100067bd0:       09 00 00 00 01 00       [MMI]       nop.m 0x0
a000000100067bd6:       80 00 24 30 20 00                   ld8 r8=[r9]
a000000100067bdc:       00 00 04 00                         nop.i 0x0;;
a000000100067be0:       03 00 00 00 01 00       [MII]       nop.m 0x0
a000000100067be6:       b0 00 20 14 72 05                   cmp.eq p11,p10=0,r8;;
a000000100067bec:       04 47 fc 8c                   (p10) adds r32=-16,r8;;
a000000100067bf0:       51 29 01 40 00 21       [MIB] (p10) mov r37=r32
a000000100067bf6:       00 00 00 02 00 05                   nop.i 0x0
a000000100067bfc:       58 fe ff 5a                   (p10) br.call.dptk.many b0=a000000100067a40 <set_next_entity>;;
a000000100067c00:       0b 18 80 41 00 21       [MMI]       adds r3=96,r32;;
a000000100067c06:       40 02 0c 30 20 00                   ld8 r36=[r3]     <----------panic.
a000000100067c0c:       00 00 04 00                         nop.i 0x0;;
a000000100067c10:       10 00 00 00 01 00       [MIB]       nop.m 0x0
a000000100067c16:       90 00 90 10 72 04                   cmp.eq p9,p8=0,r36
a000000100067c1c:       b0 ff ff 4a                   (p08) br.cond.dptk.few a000000100067bc0 <pick_next_task_fair+0x4

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Hang with fair cgroup scheduler (reproducer is attached.)
       [not found] ` <20071214161834.034e6efe.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
  2007-12-14  8:17   ` KAMEZAWA Hiroyuki
@ 2007-12-14  9:48   ` Ingo Molnar
  1 sibling, 0 replies; 25+ messages in thread
From: Ingo Molnar @ 2007-12-14  9:48 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: Dhaval Giani, vatsa-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
	Dmitry Adamushko,
	containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org, Andrew Morton,
	Peter Zijlstra


(Cc:-ed other folks as well)

* KAMEZAWA Hiroyuki <kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org> wrote:

> Hi,
> 
> While I was testing 2.6.24-rc5-mm1's fair group scheduler (with cgroup),
> the system hangs. please confirm. it's reproducible on my box.
> 
> My test program is attached.
> 
> What happens:
>   the system hangs. (panic ?)
> 
> Environ:
>   ia64/NUMA 8CPU systems. 4 cpus per node.
> 
> How to reproduce:
>   Compile attached one.
>   # gcc -o reg reg.c
>   Create group as following
>   # mount -t cgroup none /opt/cgroup -o cpu
>   # mkdir /opt/cgroup/group_1
>   # mkdir /opt/cgroup/group_2
>   
>   And run attached program
>   # ./reg 8 8
> 
> What 'reg' does;  
>   usage : reg A B C...
>   This program forks child process and assign 
>      A of processes to group_1
>      B of processes to group_2
>      C of processes to group_3
>   kick and waitpid all and repeat.
> 
> Thanks,
> -Kame

> #include <stdlib.h>
> #include <stdio.h>
> #include <strings.h>
> #include <sys/types.h>
> #include <unistd.h>
> #include <sched.h>
> #include <asm/intrinsics.h>
> #include <sys/ipc.h>
> #include <sys/shm.h>
> #include <errno.h>
> #include <sys/times.h>
> 
> static char *shared;
> #define MAX_PROCS 32
> #define SHMSIZE	(16384)
> 
> struct start_stop {
> 	int go;
> };
> 
> /* Assign PID to a group....
>  * work as # echo PID > /opt/cgroup/group_%d/tasks
>  */
> void assign_to(int pid, int group)
> {
> 	FILE *fp;
> 	char buf[32];
> 
> 	memset(buf, 0, sizeof(buf));
> 	sprintf(buf,"/opt/cgroup/group_%d/tasks",group);
> 	fp = fopen(buf,"w");
> 	if (fp == NULL) {
> 		perror("fopen");
> 		fprintf(stderr, "failed : fopen");
> 		exit(0);
> 	}
> 	fprintf(fp, "%d", pid);
> 	fclose(fp);
> 	printf("%d to %s\n", pid, buf);
> }
> 
> /*
>  * spin wait and go into small loop.
>  * # of loops are counted as score.
>  * This process's utime is recorded in times[id]
>  */
> int worker(int id)
> {
> 	struct start_stop *shared_flag;
> 
> 	shared_flag = (struct start_stop*)shared;
> 	do  {
> 		sched_yield();
> 		ia64_mf();
> 	} while (!shared_flag->go);
> }
> 
> /*
>  * If you want to assign..
>  * 2 proces to group 1, 3 procs to group 2 -># ./a.out 2 3
>  * 3 proces to group 1, 3 procs to group 2, 3 procs to group 3
>  * -># ./a.out 3 3 3
>  * Total 32 procs are supported.
>  */
> 
> int main(int argc, char *argv[])
> {
> 	int nprocs;
> 	int shmid, i;
> 	struct start_stop *shared_flag;
> 	int pids[MAX_PROCS];
> 	int groups[MAX_PROCS];
> 
> 	memset(pids, 0 , sizeof(pids));
> 	memset(groups, 0 , sizeof(groups));
> 
> again:
> 	for (nprocs = 0, i = 1; i < argc; i++) {
> 		int num = atoi(argv[i]);
> 		int j;
> 		for (j = 0; j < num; j++) {
> 			groups[nprocs + j] = i;
> 		}
> 		nprocs += num;
> 	}
> 	
> 	shmid = shmget(IPC_PRIVATE, SHMSIZE, IPC_CREAT | 0666);
> 	if (shmid == -1) {
> 		perror("shmget");
> 		exit(1);
> 	}
> 
> 	shared = shmat(shmid, NULL, 0);
> 	shared_flag = (struct start_stop *)shared;
> 
> 	memset(shared, 0, SHMSIZE);
> 	shmctl(shmid, IPC_RMID, 0);
> 	
> 	for (i = 0; i < nprocs; i++) {
> 		int ret;
> 		ret = fork();
> 		if (ret == 0) {
> 			worker(i);
> 			exit(0);
> 		} else if (ret == -1) {
> 			perror("fork");
> 			exit(0);
> 		}
> 		pids[i] = ret;
> 	}
> 	sleep(1);
> 	for (i = 0; i < nprocs; i++)
> 		assign_to(pids[i], groups[i]);
> 	sleep(1);
> 	ia64_mf();
> 	shared_flag->go = 1;
> 	
> 	for (i = 0; i < nprocs; i++) {
> 		int status;
> 		waitpid(pids[i], &status, 0);
> 	} 
> 	goto again;
> 
> 	return 0;
> }

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Hang with fair cgroup scheduler (reproducer is attached.)
       [not found]     ` <20071214171759.59f7ba57.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
@ 2007-12-14  9:49       ` Ingo Molnar
       [not found]         ` <20071214094909.GG11266-X9Un+BFzKDI@public.gmane.org>
  0 siblings, 1 reply; 25+ messages in thread
From: Ingo Molnar @ 2007-12-14  9:49 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: Dhaval Giani, vatsa-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
	Dmitry Adamushko,
	containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org, Andrew Morton,
	Peter Zijlstra


(Cc:-ed other folks as well)

* KAMEZAWA Hiroyuki <kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org> wrote:

> Tested again, and got NULL access and panic.
> 
> This is my guess from stack dump. (raw stack dump is attached below.)
> ==
> 
> static struct task_struct *pick_next_task_fair(struct rq *rq)
> {
>         struct cfs_rq *cfs_rq = &rq->cfs;
>         struct sched_entity *se;
> 
>         if (unlikely(!cfs_rq->nr_running))
>                 return NULL;
> 
>         do {
>                 se = pick_next_entity(cfs_rq); <-- se was NULL.
>                 cfs_rq = group_cfs_rq(se);     <-- se->my_q causes SEGV
>         } while (cfs_rq);
> 
>         return task_of(se);
> }
> ===
> Seems first_fair() was NULL in
> ==
> static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
> {
>         struct sched_entity *se = NULL;
> 
>         if (first_fair(cfs_rq)) { <------------------------------(*)
>                 se = __pick_next_entity(cfs_rq);
>                 set_next_entity(cfs_rq, se);
>         }
> 
>         return se;
> }
> ==
> from register information.
> 
> Thanks,
> -Kame
> 
> 
> Stack dump is here.
> ==
> Pid: 8197, CPU 6, comm:                  reg
> psr : 00001210085a2010 ifs : 8000000000000206 ip  : [<a000000100067c01>]    Not tainted
> ip is at pick_next_task_fair+0x81/0xe0
> unat: 0000000000000000 pfs : 0000000000000206 rsc : 0000000000000003
> rnat: 0000000000000000 bsps: 0000000000000000 pr  : 0000000000556959
> ldrs: 0000000000000000 ccv : 0000000000000000 fpsr: 0009804c0270033f
> csd : 0000000000000000 ssd : 0000000000000000
> b0  : a000000100067c00 b6  : a000000100076a60 b7  : a00000010000ee50
>  NaT consumption 2216203124768 [1]^M
> Modules linked in: sunrpc binfmt_misc dm_mirror dm_mod fan sg thermal e1000 processor button conta
> iner e100 eepro100 mii lpfc mptspi mptscsih mptbase ehci_hcd ohci_hcd uhci_hcd^M
> ^M
> Pid: 8197, CPU 6, comm:                  reg^M
> psr : 00001210085a2010 ifs : 8000000000000206 ip  : [<a000000100067c01>]    Not tainted^M
> ip is at pick_next_task_fair+0x81/0xe0^M
> unat: 0000000000000000 pfs : 0000000000000206 rsc : 0000000000000003^M
> rnat: 0000000000000000 bsps: 0000000000000000 pr  : 0000000000556959^M
> ldrs: 0000000000000000 ccv : 0000000000000000 fpsr: 0009804c0270033f^M
> csd : 0000000000000000 ssd : 0000000000000000^M
> b0  : a000000100067c00 b6  : a000000100076a60 b7  : a00000010000ee50^M
> f6  : 000000000000000000000 f7  : 000000000000000000000^M
> f8  : 1003e00000000a0000007 f9  : 1003e00000059499dd2c3^M
> f10 : 1003ece02a62ae350c355 f11 : 1003e0000000000000037^M
> r1  : a000000100d87a60 r2  : 000000df13538d0b r3  : 0000000000000060^M
> r8  : 0000000000000000 r9  : e00001a004034b30 r10 : 0000000000000000^M
> r11 : e00001a004034aa8 r12 : e00001a10397fe10 r13 : e00001a103970000^M
> r14 : 00000000d594bde3 r15 : e00001a004034ab0 r16 : e00001a004034ab8^M
> r17 : e00001a004034ac8 r18 : e00001a004038320 r19 : e00001a10426ff20^M
> r20 : 0000000000000000 r21 : 0000000000000000 r22 : 0000000000000001^M
> r23 : e00001a004034a91 r24 : e00001a004034a90 r25 : e00001a10426ff10^M
> r26 : 0000000000000002 r27 : e00001a0040382f0 r28 : e00001a004038288^M
> r29 : a0000001008a5468 r30 : a000000100076a60 r31 : a000000100b726e0^M
> ^M
> Call Trace:^M
>  [<a000000100013bc0>] show_stack+0x40/0xa0^M
>                                 sp=e00001a10397f860 bsp=e00001a103970f18^M
>  [<a000000100014840>] show_regs+0x840/0x880^M
>                                 sp=e00001a10397fa30 bsp=e00001a103970ec0^M
>  [<a000000100036fa0>] die+0x1a0/0x2a0^M
>                                 sp=e00001a10397fa30 bsp=e00001a103970e78^M
>  [<a0000001000370f0>] die_if_kernel+0x50/0x80^M
>                                 sp=e00001a10397fa30 bsp=e00001a103970e48^M
>  [<a000000100038260>] ia64_fault+0x1140/0x1260^M
>                                 sp=e00001a10397fa30 bsp=e00001a103970de8^M
>  [<a00000010000ae20>] ia64_leave_kernel+0x0/0x270^M
>                                 sp=e00001a10397fc40 bsp=e00001a103970de8^M
>  [<a000000100067c00>] pick_next_task_fair+0x80/0xe0^M
>                                 sp=e00001a10397fe10 bsp=e00001a103970db8^M
>  [<a0000001006f6a60>] schedule+0x8e0/0x1280^M
>                                 sp=e00001a10397fe10 bsp=e00001a103970d08^M
>  [<a000000100074e20>] sys_sched_yield+0xe0/0x100^M
>                                 sp=e00001a10397fe30 bsp=e00001a103970ca8^M
>  [<a00000010000aca0>] ia64_ret_from_syscall+0x0/0x20^M
>                                 sp=e00001a10397fe30 bsp=e00001a103970ca8^M
>  [<a000000000010720>] __kernel_syscall_via_break+0x0/0x20^M
>                                 sp=e00001a103980000 bsp=e00001a103970ca8^M
> 
> Disassemble.
> ==
> a000000100067b80 <pick_next_task_fair>:
> a000000100067b80:       18 10 19 08 80 05       [MMB]       alloc r34=ar.pfs,6,4,0
> a000000100067b86:       20 80 83 00 42 00                   adds r2=112,r32
> a000000100067b8c:       00 00 00 20                         nop.b 0x0
> a000000100067b90:       09 20 81 41 00 21       [MMI]       adds r36=96,r32
> a000000100067b96:       00 00 00 02 00 20                   nop.m 0x0
> a000000100067b9c:       04 00 c4 00                         mov r33=b0;;
> a000000100067ba0:       0b 70 00 04 18 10       [MMI]       ld8 r14=[r2];;
> a000000100067ba6:       70 00 38 0c 72 00                   cmp.eq p7,p6=0,r14
> a000000100067bac:       00 00 04 00                         nop.i 0x0;;
> a000000100067bb0:       10 00 00 00 01 c0       [MIB]       nop.m 0x0
> a000000100067bb6:       81 00 00 00 c2 03             (p07) mov r8=r0
> a000000100067bbc:       80 00 00 41                   (p07) br.cond.spnt.few a000000100067c30 <pick_next_task_fair+0xb
> 0>
> a000000100067bc0:       09 48 c0 48 00 21       [MMI]       adds r9=48,r36
> a000000100067bc6:       00 00 00 02 00 00                   nop.m 0x0
> a000000100067bcc:       04 00 00 84                         mov r32=r0;;
> a000000100067bd0:       09 00 00 00 01 00       [MMI]       nop.m 0x0
> a000000100067bd6:       80 00 24 30 20 00                   ld8 r8=[r9]
> a000000100067bdc:       00 00 04 00                         nop.i 0x0;;
> a000000100067be0:       03 00 00 00 01 00       [MII]       nop.m 0x0
> a000000100067be6:       b0 00 20 14 72 05                   cmp.eq p11,p10=0,r8;;
> a000000100067bec:       04 47 fc 8c                   (p10) adds r32=-16,r8;;
> a000000100067bf0:       51 29 01 40 00 21       [MIB] (p10) mov r37=r32
> a000000100067bf6:       00 00 00 02 00 05                   nop.i 0x0
> a000000100067bfc:       58 fe ff 5a                   (p10) br.call.dptk.many b0=a000000100067a40 <set_next_entity>;;
> a000000100067c00:       0b 18 80 41 00 21       [MMI]       adds r3=96,r32;;
> a000000100067c06:       40 02 0c 30 20 00                   ld8 r36=[r3]     <----------panic.
> a000000100067c0c:       00 00 04 00                         nop.i 0x0;;
> a000000100067c10:       10 00 00 00 01 00       [MIB]       nop.m 0x0
> a000000100067c16:       90 00 90 10 72 04                   cmp.eq p9,p8=0,r36
> a000000100067c1c:       b0 ff ff 4a                   (p08) br.cond.dptk.few a000000100067bc0 <pick_next_task_fair+0x4

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Hang with fair cgroup scheduler (reproducer is attached.)
       [not found]         ` <20071214094909.GG11266-X9Un+BFzKDI@public.gmane.org>
@ 2007-12-14 10:58           ` KAMEZAWA Hiroyuki
       [not found]             ` <20071214195837.0d3511db.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
       [not found]             ` <b647ffbd0712140447kfba5945ybde40f18653dd164-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 2 replies; 25+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-12-14 10:58 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Dhaval Giani, vatsa-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
	Dmitry Adamushko,
	containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org, Andrew Morton,
	Peter Zijlstra

Here is much easier test.
(I'm sorry I'll be absent tomorrow.)

the number of cpus is 8. ia64/NUMA.

The hang occurs when the number of tasks is not smaller than available cpus.
Can be a hint ?

==
[root@rhel51GA testpro]# cat yield.c
#include <sched.h>

int main()
{
        while (1)
                sched_yield();
}


[root@rhel51GA testpro]# cat batch-test.sh
#!/bin/bash -x

mount -t cgroup none /opt/cgroup -o cpu
mkdir /opt/cgroup/group_1
mkdir /opt/cgroup/group_2

./yield &
PIDA=$!
./yield &
PIDB=$!

while true; do
        echo $PIDA > /opt/cgroup/group_1/tasks
        echo $PIDB > /opt/cgroup/group_1/tasks
        echo $PIDA > /opt/cgroup/group_2/tasks;
        echo $PIDB > /opt/cgroup/group_2/tasks
done

[root@rhel51GA testpro]#./batech-test.sh
no hang.

[root@rhel51GA testpro]#taskset 0f ./batech-test.sh
no hang

[root@rhel51GA testpro]#taskset 03 ./batech-test.sh
hang.
Pid: 8132, CPU 0, comm:                yield
psr : 00001210085a2010 ifs : 8000000000000206 ip  : [<a000000100067c01>]    Not tainted
ip is at pick_next_task_fair+0x81/0xe0
unat: 0000000000000000 pfs : 0000000000000b1d rsc : 0000000000000003
rnat: 0000000000000000 bsps: 0000000000000000 pr  : 0000000000566959
ldrs: 0000000000000000 ccv : 0000000000000000 fpsr: 0009804c0270033f
csd : 0000000000000000 ssd : 0000000000000000
b0  : a0000001006f6ac0 b6  : a000000100076a60 b7  : a000000100067b80
f6  : 000000000000000000000 f7  : 000000000000000000000
f8  : 1003e00000000a0000007 f9  : 1003e0000004633b23e65
f10 : 1003ee04f68ea89dfb4c3 f11 : 1003e000000000000002b
r1  : a000000100d87a60 r2  : e0000000011082f0 r3  : 0000000000000060
r8  : 0000000000000000 r9  : e000000001108310 r10 : e000004080032018
r11 : 00000000f86ccc70 r12 : e00000408394fe10 r13 : e000004083940000
r14 : 0000000000000001 r15 : 0000000000000064 r16 : e0000000011089f0
r17 : ffffffffffffffff r18 : e000000001108360 r19 : 0000000000000000
r20 : e00000408003ef10 r21 : 0000000001e9555b r22 : 000000af762794d4
r23 : 00000015e1abc70b r24 : ffffffffffffe463 r25 : e00000408003ef10
r26 : 0000000000000002 r27 : e0000000011082f0 r28 : e000000001108288
r29 : a0000001008a5468 r30 : a000000100076a60 r31 : a000000100b726e0

Call Trace:
 [<a000000100013bc0>] show_stack+0x40/0xa0
                                sp=e00000408394f860 bsp=e000004083940f18
 [<a000000100014840>] show_regs+0x840/0x880
                                sp=e00000408394fa30 bsp=e000004083940ec0
 [<a000000100036fa0>] die+0x1a0/0x2a0
                                sp=e00000408394fa30 bsp=e000004083940e78
 [<a0000001000370f0>] die_if_kernel+0x50/0x80
                                sp=e00000408394fa30 bsp=e000004083940e48
 [<a000000100038260>] ia64_fault+0x1140/0x1260
                                sp=e00000408394fa30 bsp=e000004083940de8
 [<a00000010000ae20>] ia64_leave_kernel+0x0/0x270
                                sp=e00000408394fc40 bsp=e000004083940de8
 [<a000000100067c00>] pick_next_task_fair+0x80/0xe0
                                sp=e00000408394fe10 bsp=e000004083940db8
 [<a0000001006f6ac0>] schedule+0x940/0x1280
                                sp=e00000408394fe10 bsp=e000004083940d08
 [<a000000100074e20>] sys_sched_yield+0xe0/0x100
                                sp=e00000408394fe30 bsp=e000004083940ca8
 [<a00000010000aca0>] ia64_ret_from_syscall+0x0/0x20
                                sp=e00000408394fe30 bsp=e000004083940ca8
 [<a000000000010720>] __kernel_syscall_via_break+0x0/0x20
                                sp=e000004083950000 bsp=e000004083940ca8
Thanks,
-Kame

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Hang with fair cgroup scheduler (reproducer is attached.)
       [not found]             ` <20071214195837.0d3511db.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
@ 2007-12-14 11:48               ` Dhaval Giani
  2007-12-14 12:47               ` Dmitry Adamushko
  1 sibling, 0 replies; 25+ messages in thread
From: Dhaval Giani @ 2007-12-14 11:48 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: Peter Zijlstra, vatsa-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
	Dmitry Adamushko,
	containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org, Ingo Molnar,
	Andrew Morton

On Fri, Dec 14, 2007 at 07:58:37PM +0900, KAMEZAWA Hiroyuki wrote:
> Here is much easier test.

Thanks for the test! Let me see if I can reproduce it here.

-- 
regards,
Dhaval

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Hang with fair cgroup scheduler (reproducer is attached.)
       [not found]             ` <20071214195837.0d3511db.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
  2007-12-14 11:48               ` Dhaval Giani
@ 2007-12-14 12:47               ` Dmitry Adamushko
       [not found]                 ` <20071214141528.GA6161-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
  1 sibling, 1 reply; 25+ messages in thread
From: Dmitry Adamushko @ 2007-12-14 12:47 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: Dhaval Giani, vatsa-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
	containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org, Ingo Molnar,
	Andrew Morton, Peter Zijlstra

On 14/12/2007, KAMEZAWA Hiroyuki <kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org> wrote:
> Here is much easier test.
> (I'm sorry I'll be absent tomorrow.)
>
> the number of cpus is 8. ia64/NUMA.
>
> The hang occurs when the number of tasks is not smaller than available cpus.
> Can be a hint ?
>
> [ ... ]
>
> [root@rhel51GA testpro]#./batech-test.sh
> no hang.
>
> [root@rhel51GA testpro]#taskset 0f ./batech-test.sh
> no hang
>
> [root@rhel51GA testpro]#taskset 03 ./batech-test.sh
> hang.

have you tried :

[root@rhel51GA testpro]#taskset 01 ./batech-test.sh

hang?

just to be sure SMP does matter here (most likely yes, I guess).

TIA,

>
> Thanks,
> -Kame
>

-- 
Best regards,
Dmitry Adamushko

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Re: Hang with fair cgroup scheduler (reproducer is attached.)
       [not found]             ` <b647ffbd0712140447kfba5945ybde40f18653dd164-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2007-12-14 12:50               ` kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A
  2007-12-14 14:15               ` Dhaval Giani
  1 sibling, 0 replies; 25+ messages in thread
From: kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A @ 2007-12-14 12:50 UTC (permalink / raw)
  To: Dmitry Adamushko
  Cc: Peter Zijlstra, vatsa-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
	containers-qjLDD68F18O7TbgM5vRIOg, Ingo Molnar, Andrew Morton,
	Dhaval Giani

-
>have you tried :
>
>[root@rhel51GA testpro]#taskset 01 ./batech-test.sh
>
yes

>hang?
>
no.

>just to be sure SMP does matter here (most likely yes, I guess).
>
maybe. As far as I tested, there was no hang if the number of cpus is 1.

Regards,
-Kame

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Hang with fair cgroup scheduler (reproducer is attached.)
       [not found]             ` <b647ffbd0712140447kfba5945ybde40f18653dd164-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2007-12-14 12:50               ` kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A
@ 2007-12-14 14:15               ` Dhaval Giani
  1 sibling, 0 replies; 25+ messages in thread
From: Dhaval Giani @ 2007-12-14 14:15 UTC (permalink / raw)
  To: Dmitry Adamushko
  Cc: Peter Zijlstra, vatsa-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
	containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org, Ingo Molnar,
	Andrew Morton

On Fri, Dec 14, 2007 at 01:47:13PM +0100, Dmitry Adamushko wrote:
> On 14/12/2007, KAMEZAWA Hiroyuki <kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org> wrote:
> > Here is much easier test.
> > (I'm sorry I'll be absent tomorrow.)
> >
> > the number of cpus is 8. ia64/NUMA.
> >
> > The hang occurs when the number of tasks is not smaller than available cpus.
> > Can be a hint ?
> >
> > [ ... ]
> >
> > [root@rhel51GA testpro]#./batech-test.sh
> > no hang.
> >
> > [root@rhel51GA testpro]#taskset 0f ./batech-test.sh
> > no hang
> >
> > [root@rhel51GA testpro]#taskset 03 ./batech-test.sh
> > hang.
> 
> have you tried :
> 
> [root@rhel51GA testpro]#taskset 01 ./batech-test.sh
> 
> hang?
> 
> just to be sure SMP does matter here (most likely yes, I guess).
> 

NUMA? I am not able to reproduce it here locally on an x86 8 CPU box.


-- 
regards,
Dhaval

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Re: Hang with fair cgroup scheduler (reproducer is attached.)
       [not found]                 ` <20071214141528.GA6161-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
@ 2007-12-14 14:24                   ` kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A
       [not found]                     ` <20442799.1197642268756.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
  0 siblings, 1 reply; 25+ messages in thread
From: kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A @ 2007-12-14 14:24 UTC (permalink / raw)
  To: Dhaval Giani
  Cc: Peter Zijlstra, vatsa-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
	Dmitry Adamushko, containers-qjLDD68F18O7TbgM5vRIOg, Ingo Molnar,
	Andrew Morton

>> just to be sure SMP does matter here (most likely yes, I guess).
>> 
>
>NUMA? I am not able to reproduce it here locally on an x86 8 CPU box.
>
yes. I used NUMA. 2 Nodes/4CPU x 2

Hmm..

Thanks,
-Kame

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Re: Hang with fair cgroup scheduler (reproducer is attached.)
       [not found]                     ` <20442799.1197642268756.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
@ 2007-12-14 15:36                       ` Dhaval Giani
       [not found]                         ` <20071214153607.GB23670-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
  0 siblings, 1 reply; 25+ messages in thread
From: Dhaval Giani @ 2007-12-14 15:36 UTC (permalink / raw)
  To: kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A
  Cc: Peter Zijlstra, vatsa-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
	Dmitry Adamushko, containers-qjLDD68F18O7TbgM5vRIOg, Ingo Molnar,
	Andrew Morton

On Fri, Dec 14, 2007 at 11:24:28PM +0900, kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org wrote:
> >> just to be sure SMP does matter here (most likely yes, I guess).
> >> 
> >
> >NUMA? I am not able to reproduce it here locally on an x86 8 CPU box.
> >
> yes. I used NUMA. 2 Nodes/4CPU x 2
> 

OK, I got hold of an IA64 box, non numa and have managed to reproduce
it.

> Hmm..
> 
> Thanks,
> -Kame

-- 
regards,
Dhaval

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Re: Hang with fair cgroup scheduler (reproducer is attached.)
       [not found]                         ` <20071214153607.GB23670-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
@ 2007-12-14 15:38                           ` Dhaval Giani
       [not found]                             ` <20071214153823.GC23670-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
  0 siblings, 1 reply; 25+ messages in thread
From: Dhaval Giani @ 2007-12-14 15:38 UTC (permalink / raw)
  To: kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A
  Cc: Peter Zijlstra, vatsa-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
	Dmitry Adamushko, containers-qjLDD68F18O7TbgM5vRIOg, Ingo Molnar,
	Andrew Morton

On Fri, Dec 14, 2007 at 09:06:07PM +0530, Dhaval Giani wrote:
> On Fri, Dec 14, 2007 at 11:24:28PM +0900, kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org wrote:
> > >> just to be sure SMP does matter here (most likely yes, I guess).
> > >> 
> > >
> > >NUMA? I am not able to reproduce it here locally on an x86 8 CPU box.
> > >
> > yes. I used NUMA. 2 Nodes/4CPU x 2
> > 
> 
> OK, I got hold of an IA64 box, non numa and have managed to reproduce
> it.
> 

Actually no, its another bug. Thanks for the program!

reg[3330]: NaT consumption 2216203124768 [1]
Modules linked in: ipv6 button binfmt_misc nls_iso8859_1 loop dm_mod tg3
ext3 jbd fan thermal processor sg mptspi mptscsih mptbase
scsi_transport_spi via82cxxx sd_mod scsi_mod ide_disk ide_core

Pid: 3330, CPU 3, comm:                  reg
psr : 00001210085a2010 ifs : 8000000000000308 ip  : [<a0000001002e0481>]
Not tainted
ip is at rb_erase+0x301/0x7e0
unat: 0000000000000000 pfs : 0000000000000308 rsc : 0000000000000003
rnat: 0000000000000000 bsps: 0000000000000000 pr  : a5565666a9556959
ldrs: 0000000000000000 ccv : 0000000000000000 fpsr: 0009804c0270033f
csd : 0000000000000000 ssd : 0000000000000000
b0  : a000000100076290 b6  : a000000100086b20 b7  : a000000100076360
f6  : 1003e0000000000000d34 f7  : 1003e000000000000000a
f8  : 1003e0000000000000000 f9  : 1003e0000000000000152
f10 : 1003e0000000000000004 f11 : 0fff2fffffffff0000000
r1  : a000000100c92030 r2  : e000000244bd0068 r3  : e000000245882000
r8  : e000000245882000 r9  : e000000241e6eda0 r10 : 0000000000000001
r11 : e0000002458f0070 r12 : e0000002458a7d80 r13 : e0000002458a0000
r14 : e000000244bd0060 r15 : e000000244bd0058 r16 : 0000000000000000
r17 : e000000245920d34 r18 : 0000000000000000 r19 : 0000000000000000
r20 : e000000245920c90 r21 : 0000000000000001 r22 : a000000100076360
r23 : a000000100a7f2f8 r24 : a000000100a7f2b0 r25 : e0000002458c0058
r26 : e000000004e05b10 r27 : 0000000000000001 r28 : 0000000000000000
r29 : a000000100a7f2e0 r30 : a000000100a7f2b0 r31 : e000000245920098

Call Trace:
 [<a000000100014a80>] show_stack+0x40/0xa0
                                sp=e0000002458a77d0 bsp=e0000002458a1310
 [<a000000100015380>] show_regs+0x840/0x880
                                sp=e0000002458a79a0 bsp=e0000002458a12b8
 [<a0000001000384a0>] die+0x1a0/0x2a0
                                sp=e0000002458a79a0 bsp=e0000002458a1270
 [<a0000001000385f0>] die_if_kernel+0x50/0x80
                                sp=e0000002458a79a0 bsp=e0000002458a1240
 [<a0000001005b1a80>] ia64_fault+0x1180/0x12a0
                                sp=e0000002458a79a0 bsp=e0000002458a11e0
 [<a00000010000b2a0>] ia64_leave_kernel+0x0/0x270
                                sp=e0000002458a7bb0 bsp=e0000002458a11e0
 [<a0000001002e0480>] rb_erase+0x300/0x7e0
                                sp=e0000002458a7d80 bsp=e0000002458a11a0
 [<a000000100076290>] __dequeue_entity+0x70/0xa0
                                sp=e0000002458a7d80 bsp=e0000002458a1170
 [<a000000100076300>] set_next_entity+0x40/0xa0
                                sp=e0000002458a7d80 bsp=e0000002458a1148
 [<a0000001000763a0>] set_curr_task_fair+0x40/0xa0
                                sp=e0000002458a7d80 bsp=e0000002458a1128
 [<a000000100078d90>] sched_move_task+0x2d0/0x340
                                sp=e0000002458a7d80 bsp=e0000002458a10e8
 [<a000000100078e20>] cpu_cgroup_attach+0x20/0x40
                                sp=e0000002458a7d90 bsp=e0000002458a10b0
 [<a0000001000e9370>] attach_task+0x9b0/0xac0
                                sp=e0000002458a7d90 bsp=e0000002458a1058
 [<a0000001000ed4e0>] cgroup_common_file_write+0x340/0x520
                                sp=e0000002458a7dc0 bsp=e0000002458a1010
 [<a0000001000eccd0>] cgroup_file_write+0xf0/0x300
                                sp=e0000002458a7dd0 bsp=e0000002458a0fc0
 [<a00000010017bbd0>] vfs_write+0x1d0/0x320
                                sp=e0000002458a7e20 bsp=e0000002458a0f70
 [<a00000010017c7f0>] sys_write+0x70/0xe0
                                sp=e0000002458a7e20 bsp=e0000002458a0ef8
 [<a00000010000b100>] ia64_ret_from_syscall+0x0/0x20
                                sp=e0000002458a7e30 bsp=e0000002458a0ef8
 [<a000000000010720>] __kernel_syscall_via_break+0x0/0x20
                                sp=e0000002458a8000 bsp=e0000002458a0ef8



-- 
regards,
Dhaval

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Re: Hang with fair cgroup scheduler (reproducer is attached.)
       [not found]                             ` <20071214153823.GC23670-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
@ 2007-12-14 16:25                               ` Dmitry Adamushko
       [not found]                                 ` <b647ffbd0712140825h4f541be0xa7a7866e70b3af7a-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 25+ messages in thread
From: Dmitry Adamushko @ 2007-12-14 16:25 UTC (permalink / raw)
  To: Dhaval Giani
  Cc: Peter Zijlstra, vatsa-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
	containers-qjLDD68F18O7TbgM5vRIOg, Ingo Molnar, Andrew Morton

On 14/12/2007, Dhaval Giani <dhaval-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org> > >
> Actually no, its another bug. Thanks for the program!
>

Humm... this crash is very likely to be caused by the same bug. It
just reveals itself in a different place, but effectivelly the pattern
looks similar. Anyway, the rb-tree gets corrupted... and for both
cases, at the very least the 'current' must be within the tree.
I think, if you repeat your test a number of times, you'll likely get
the very same crash as was reported by Kame.

ia64 does define __ARCH_WANT_UNLOCKED_CTXSW (I checked against
2.6.23.1 that I have at hand)

x86 -- not (it's not reproducible there, right?)

so for ia64 task_running() makes use of 'p->oncpu' to determine
whether a given task is currently running
(as opposed to 'rq->curr == p' otherwise)...

But at first glance, it looks like there shouldn't be situations
leading to some sort of de-synchronization in determining the real
'current'.

Will look at it closer.


>
> --
> regards,
> Dhaval
>

-- 
Best regards,
Dmitry Adamushko

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Re: Hang with fair cgroup scheduler (reproducer is attached.)
       [not found]                                 ` <b647ffbd0712140825h4f541be0xa7a7866e70b3af7a-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2007-12-14 19:51                                   ` Dmitry Adamushko
       [not found]                                     ` <b647ffbd0712141151k697d9bbemda9a7e90515e4400-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 25+ messages in thread
From: Dmitry Adamushko @ 2007-12-14 19:51 UTC (permalink / raw)
  To: Ingo Molnar, Srivatsa Vaddagiri
  Cc: Peter Zijlstra, Steven Rostedt, containers-qjLDD68F18O7TbgM5vRIOg,
	Andrew Morton, Dhaval Giani

> [ ... ]
>
>  [<a0000001002e0480>] rb_erase+0x300/0x7e0
>  [<a000000100076290>] __dequeue_entity+0x70/0xa0
>  [<a000000100076300>] set_next_entity+0x40/0xa0
>  [<a0000001000763a0>] set_curr_task_fair+0x40/0xa0
>  [<a000000100078d90>] sched_move_task+0x2d0/0x340
>  [<a000000100078e20>] cpu_cgroup_attach+0x20/0x40
>
> [ ... ]

argh... it's a consequence of the 'current is not kept within the tree" indeed.

When sched_move_task() is called for the 'current' (running on another CPU),
we get the following:

...
        running = task_running(rq, tsk);
        on_rq = tsk->se.on_rq;

        if (on_rq) {
                dequeue_task(rq, tsk, 0);
                if (unlikely(running))
                        tsk->sched_class->put_prev_task(rq, tsk);
        }

[1]  tsk->sched_class->put_prev_task() actually _inserts_ 'tsk' back
into the cfs_rq of its _old_ group :

        set_task_cfs_rq(tsk, task_cpu(tsk));

[2] now task.se->cfs_rq gets changed

        if (on_rq) {
                if (unlikely(running))
                        tsk->sched_class->set_curr_task(rq);

[3] and now,  tsk->sched_class->set_curr_task(rq) _removes_ the
'current' from the tree... but this tree belongs to the _new_ group
(the task is still within the 'old_group->cfs_rq->rb_tree') ---> oops!

                enqueue_task(rq, tsk, 0);
        }

Anyway, I have to admit that this problem is a consequence of the
special-case treatment for the 'current' by
'dequeue/enqueue_task()'... it makes the interface less transparent
indeed.

/me thinking on how to get it fixed (e.g. set_task_cfs_rq() might take
care of it) or just get this special-case issue removed (have to check
whether we lose anything in this case)... sigh.


-- 
Best regards,
Dmitry Adamushko

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Re: Hang with fair cgroup scheduler (reproducer is attached.)
       [not found]                                     ` <b647ffbd0712141151k697d9bbemda9a7e90515e4400-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2007-12-14 21:33                                       ` Steven Rostedt
       [not found]                                         ` <Pine.LNX.4.58.0712141614340.22005-f9ZlEuEWxVcI6MkJdU+c8EEOCMrvLtNR@public.gmane.org>
  0 siblings, 1 reply; 25+ messages in thread
From: Steven Rostedt @ 2007-12-14 21:33 UTC (permalink / raw)
  To: Dmitry Adamushko
  Cc: Peter Zijlstra, Srivatsa Vaddagiri,
	containers-qjLDD68F18O7TbgM5vRIOg, Ingo Molnar, Andrew Morton,
	Dhaval Giani


On Fri, 14 Dec 2007, Dmitry Adamushko wrote:

>
> argh... it's a consequence of the 'current is not kept within the tree" indeed.
>

Thanks Dmitry for tracking this down. Although I'm still not convinced we
hit the same bug. But I'm going to go ahead and release 2.6.24-rc5-rt1
anyway. When you have a fix, please CC me and I'll add it to -rt2.

Note: I've added a bunch of logdev (see
http://rostedt.homelinux.com/logdev/README) and I kicked off the hackbench
again. I'll let it run overnight, and if it hits the bug, it will give me
a lot more output to let me know what actually happened.

Thanks,

-- Steve

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Re: Hang with fair cgroup scheduler (reproducer is attached.)
       [not found]                                         ` <Pine.LNX.4.58.0712141614340.22005-f9ZlEuEWxVcI6MkJdU+c8EEOCMrvLtNR@public.gmane.org>
@ 2007-12-15 10:22                                           ` Dmitry Adamushko
       [not found]                                             ` <b647ffbd0712150222p30cac9f9i772c2a2c4e05a4a-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 25+ messages in thread
From: Dmitry Adamushko @ 2007-12-15 10:22 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Peter Zijlstra, Srivatsa Vaddagiri,
	containers-qjLDD68F18O7TbgM5vRIOg, Ingo Molnar, Andrew Morton,
	Dhaval Giani

On 14/12/2007, Steven Rostedt <rostedt-nx8X9YLhiw1AfugRpC6u6w@public.gmane.org> wrote:
>
> On Fri, 14 Dec 2007, Dmitry Adamushko wrote:
>
> >
> > argh... it's a consequence of the 'current is not kept within the tree" indeed.
> >
>
> Thanks Dmitry for tracking this down.

My analysis was flawed (hmm... me was under control of Belgium beer :-)

The task in not on the runqueue (p->on_rq == 0) at the moment when
put_prev_task_fair() and set_curr_task_fair() get its turn in
sched_move_task()... so dequeue/enqueue_entity() are not triggered,
that's good.

so back to the square #0.


> Thanks,
>
> -- Steve

-- 
Best regards,
Dmitry Adamushko

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Re: Hang with fair cgroup scheduler (reproducer is attached.)
       [not found]                                             ` <b647ffbd0712150222p30cac9f9i772c2a2c4e05a4a-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2007-12-15 10:50                                               ` Dhaval Giani
       [not found]                                                 ` <20071215105036.GB26325-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
  2007-12-15 23:44                                               ` Dmitry Adamushko
  1 sibling, 1 reply; 25+ messages in thread
From: Dhaval Giani @ 2007-12-15 10:50 UTC (permalink / raw)
  To: Dmitry Adamushko
  Cc: Peter Zijlstra, Srivatsa Vaddagiri, Steven Rostedt,
	containers-qjLDD68F18O7TbgM5vRIOg, Ingo Molnar, Andrew Morton

On Sat, Dec 15, 2007 at 11:22:08AM +0100, Dmitry Adamushko wrote:
> On 14/12/2007, Steven Rostedt <rostedt-nx8X9YLhiw1AfugRpC6u6w@public.gmane.org> wrote:
> >
> > On Fri, 14 Dec 2007, Dmitry Adamushko wrote:
> >
> > >
> > > argh... it's a consequence of the 'current is not kept within the tree" indeed.
> > >
> >
> > Thanks Dmitry for tracking this down.
> 
> My analysis was flawed (hmm... me was under control of Belgium beer :-)
> 
> The task in not on the runqueue (p->on_rq == 0) at the moment when
> put_prev_task_fair() and set_curr_task_fair() get its turn in
> sched_move_task()... so dequeue/enqueue_entity() are not triggered,
> that's good.
> 

Again, I am probably missing something, but if on_rq == 0, then how is
set_curr_task_fair() getting called?

-- 
regards,
Dhaval

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Re: Hang with fair cgroup scheduler (reproducer is attached.)
       [not found]                                                 ` <20071215105036.GB26325-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
@ 2007-12-15 11:15                                                   ` Dmitry Adamushko
  0 siblings, 0 replies; 25+ messages in thread
From: Dmitry Adamushko @ 2007-12-15 11:15 UTC (permalink / raw)
  To: Dhaval Giani
  Cc: Peter Zijlstra, Srivatsa Vaddagiri, Steven Rostedt,
	containers-qjLDD68F18O7TbgM5vRIOg, Ingo Molnar, Andrew Morton

On 15/12/2007, Dhaval Giani <dhaval-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org> wrote:
> On Sat, Dec 15, 2007 at 11:22:08AM +0100, Dmitry Adamushko wrote:
> > On 14/12/2007, Steven Rostedt <rostedt-nx8X9YLhiw1AfugRpC6u6w@public.gmane.org> wrote:
> > >
> > > On Fri, 14 Dec 2007, Dmitry Adamushko wrote:
> > >
> > > >
> > > > argh... it's a consequence of the 'current is not kept within the tree" indeed.
> > > >
> > >
> > > Thanks Dmitry for tracking this down.
> >
> > My analysis was flawed (hmm... me was under control of Belgium beer :-)
> >
> > The task in not on the runqueue (p->on_rq == 0) at the moment when
> > put_prev_task_fair() and set_curr_task_fair() get its turn in
> > sched_move_task()... so dequeue/enqueue_entity() are not triggered,
> > that's good.
> >
>
> Again, I am probably missing something, but if on_rq == 0, then how is
> set_curr_task_fair() getting called?
>

...
        running = task_running(rq, tsk);
        on_rq = tsk->se.on_rq;

// let's say on_rq == 1 , i.e. the task is on the runqueue

        if (on_rq) {
                dequeue_task(rq, tsk, 0);

// now tsk->se.on_rq becomes 0

                if (unlikely(running))
                        tsk->sched_class->put_prev_task(rq, tsk);

// put_prev_task() --> put_prev_entity() checks for 'tsk->se.on_rq' to
determine whether __enqueue_entity() must be done ---> and it's 0 in
our case.

[ it can be non-zero for the following path : schedule() -->
put_prev_task(..., prev) when deactivate_task(..., prev) was not
previously called in schedule(), i.e. 'prev' was preempted ]

tsk->se.on_rq will become 1 only after enqueue_task(). As a result,
tsk->se.on_rq is still 0 when set_curr_task() is executed.

does it make sense now?


> --
> regards,
> Dhaval
>


-- 
Best regards,
Dmitry Adamushko

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Re: Hang with fair cgroup scheduler (reproducer is attached.)
       [not found]                                             ` <b647ffbd0712150222p30cac9f9i772c2a2c4e05a4a-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2007-12-15 10:50                                               ` Dhaval Giani
@ 2007-12-15 23:44                                               ` Dmitry Adamushko
       [not found]                                                 ` <b647ffbd0712151544n2dfad101r2d306d393e8550ff-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  1 sibling, 1 reply; 25+ messages in thread
From: Dmitry Adamushko @ 2007-12-15 23:44 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Dhaval Giani, Srivatsa Vaddagiri, Steven Rostedt,
	containers-qjLDD68F18O7TbgM5vRIOg, Andrew Morton, Peter Zijlstra

On 15/12/2007, Dmitry Adamushko <dmitry.adamushko-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:
>
> My analysis was flawed (hmm... me was under control of Belgium beer :-)
>

ok, I've got another one (just in case... well, this late hour to be
blamed now :-/)

according to Dhaval, we have a crash on ia64 (it's also the arch for
the original report) and it's not reproducible on an otherwise similar
(wrt. # of cpus) x86.

(1) The difference that comes first in mind is that ia64 makes use of
__ARCH_WANT_UNLOCKED_CTXSW

dimm@earth:~/storage/kernel/linux-2.6$ grep -rn
__ARCH_WANT_UNLOCKED_CTXSW include/
include/linux/sched.h:947:#ifdef __ARCH_WANT_UNLOCKED_CTXSW
include/asm-mips/system.h:216:#define __ARCH_WANT_UNLOCKED_CTXSW
include/asm-ia64/system.h:259:#define __ARCH_WANT_UNLOCKED_CTXSW


(2) now, in this case (and for SMP)

task_running() effectively becomes { return p->oncpu; }


(3) consider a case of the context switch between prev --> next on CPU #0

'next' has preempted 'prev'


(4) context_swicth() :

next->oncpu becomes '1' as the result of:

[1] context_switch() --> prepare_task_switch() --> prepare_lock_switch(next) -->
next->oncpu = 1

prev->oncpu becomes '0' as the result of:

[2] context_switch() --> finish_task_switch() -->
finish_lock_switch(prev) --> prev->oncpu = 0


[1] takes place at the very _beginning_ of context_switch() _and_ one
more thing is that rq->lock gets unlocked.

[2] takes place at the very _end_ of context_switch()

Now recall what's task_running() in our case  ( it's "return task->oncpu" )

As a result, between [1] and [2] we have 2 tasks on a single CPU for
which task_running() will return '1' and their runqueue is _unlocked_.


(5) now consider sched_move_task() running on another CPU #1.

due to 'UNLOCKED_CTXSW' it can successfully lock the rq of CPU #0

let's say it's called for 'prev' task (the one being scheduled out on
CPU #0 at this very moment)

as we remember, task_running() returns '1' for it (CPU #0 haven't
reached yet point [2] as described in (4) above)

'prev' is currently on the runqueue (prev->se.on_rq == 1) and within the tree.

what happens is as follows:

- dequeue_task() removes it from the tree ;
- put_prev_task() makes cfs_rq->curr = NULL ;

se == prev.se here... so e.g. __enqueue_entity() is not called for 'prev'

- set_curr_task() --> set_curr_task_fair()

and here things become interesting.

static void set_curr_task_fair(struct rq *rq)
{
        struct sched_entity *se = &rq->curr->se;

        for_each_sched_entity(se)
                set_next_entity(cfs_rq_of(se), se);
}

so 'se' actually belongs to the 'next' on CPU #0

next->on_rq == 1 (obviously, as dequeue_task() in sched_move_task()
was done for 'prev' !)

and now, set_next_entity() does __dequeue_entity() for 'next' which is
_not_ within the tree !!!
(it's the real 'current' on CPU #0)

that's why the reported oops:

>  [<a0000001002e0480>] rb_erase+0x300/0x7e0
>  [<a000000100076290>] __dequeue_entity+0x70/0xa0
>  [<a000000100076300>] set_next_entity+0x40/0xa0
>  [<a0000001000763a0>] set_curr_task_fair+0x40/0xa0
>  [<a000000100078d90>] sched_move_task+0x2d0/0x340
>  [<a000000100078e20>] cpu_cgroup_attach+0x20/0x40

or maybe there is also a possibility of the rb-tree being corrupted as
a result and having a crash somewhere later (the original report had
another backtrace)

hum... does this analysis make sense to somebody else now?


-- 
Best regards,
Dmitry Adamushko

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Re: Hang with fair cgroup scheduler (reproducer is attached.)
       [not found]                                                 ` <b647ffbd0712151544n2dfad101r2d306d393e8550ff-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2007-12-16  0:00                                                   ` Dmitry Adamushko
       [not found]                                                     ` <b647ffbd0712151600s14e3f355we5ee6348b4d484cc-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 25+ messages in thread
From: Dmitry Adamushko @ 2007-12-16  0:00 UTC (permalink / raw)
  To: Dhaval Giani
  Cc: Peter Zijlstra, Srivatsa Vaddagiri, Steven Rostedt,
	containers-qjLDD68F18O7TbgM5vRIOg, Andrew Morton, Ingo Molnar

[-- Attachment #1: Type: text/plain, Size: 479 bytes --]

Dhaval,

so following the analysis in the previous mail... here is a test
patch. Could you please give it a try?

TIA,

(enclosed non white-space broken version)

---
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7360,7 +7360,7 @@ void sched_move_task(struct task_struct *tsk)

        update_rq_clock(rq);

-       running = task_running(rq, tsk);
+       running = (rq->curr == tsk);
        on_rq = tsk->se.on_rq;

        if (on_rq) {
---

-- 
Best regards,
Dmitry Adamushko

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 01-set_task_cfs_rq.patch --]
[-- Type: text/x-patch; name=01-set_task_cfs_rq.patch, Size: 434 bytes --]

diff --git a/include/linux/sched.h b/include/linux/sched.h
diff --git a/kernel/sched.c b/kernel/sched.c
index dc6fb24..12ff60f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7360,7 +7360,7 @@ void sched_move_task(struct task_struct *tsk)
 
 	update_rq_clock(rq);
 
-	running = task_running(rq, tsk);
+	running = (rq->curr == tsk);
 	on_rq = tsk->se.on_rq;
 
 	if (on_rq) {
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c

[-- Attachment #3: Type: text/plain, Size: 206 bytes --]

_______________________________________________
Containers mailing list
Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
https://lists.linux-foundation.org/mailman/listinfo/containers

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: Re: Hang with fair cgroup scheduler (reproducer is attached.)
       [not found]                                                     ` <b647ffbd0712151600s14e3f355we5ee6348b4d484cc-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2007-12-16  4:28                                                       ` Dhaval Giani
       [not found]                                                         ` <20071216042821.GA8494-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
  2007-12-16  8:55                                                       ` Ingo Molnar
  1 sibling, 1 reply; 25+ messages in thread
From: Dhaval Giani @ 2007-12-16  4:28 UTC (permalink / raw)
  To: Dmitry Adamushko
  Cc: Peter Zijlstra, Srivatsa Vaddagiri, Steven Rostedt,
	containers-qjLDD68F18O7TbgM5vRIOg, Andrew Morton, Ingo Molnar

On Sun, Dec 16, 2007 at 01:00:07AM +0100, Dmitry Adamushko wrote:
> Dhaval,
> 
> so following the analysis in the previous mail... here is a test
> patch. Could you please give it a try?
> 

Yep, it works!

Tested-by: Dhaval Giani <dhaval-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>

thanks,
-- 
regards,
Dhaval

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Re: Hang with fair cgroup scheduler (reproducer is attached.)
       [not found]                                                     ` <b647ffbd0712151600s14e3f355we5ee6348b4d484cc-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2007-12-16  4:28                                                       ` Dhaval Giani
@ 2007-12-16  8:55                                                       ` Ingo Molnar
       [not found]                                                         ` <20071216085559.GB20790-X9Un+BFzKDI@public.gmane.org>
  1 sibling, 1 reply; 25+ messages in thread
From: Ingo Molnar @ 2007-12-16  8:55 UTC (permalink / raw)
  To: Dmitry Adamushko
  Cc: Peter Zijlstra, Srivatsa Vaddagiri, Steven Rostedt,
	containers-qjLDD68F18O7TbgM5vRIOg, Andrew Morton, Dhaval Giani


* Dmitry Adamushko <dmitry.adamushko-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:

> --- a/kernel/sched.c
> +++ b/kernel/sched.c
> @@ -7360,7 +7360,7 @@ void sched_move_task(struct task_struct *tsk)
> 
>         update_rq_clock(rq);
> 
> -       running = task_running(rq, tsk);
> +       running = (rq->curr == tsk);
>         on_rq = tsk->se.on_rq;

thanks, i've queued this up (pending more testing).

Btw., you should be able to force the ia64 scheduling by adding this to 
the very top of include/linux/sched.h:

#define __ARCH_WANT_UNLOCKED_CTXSW
#define __ARCH_WANT_INTERRUPTS_ON_CTXSW

	Ingo

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Re: Hang with fair cgroup scheduler (reproducer is attached.)
       [not found]                                                         ` <20071216085559.GB20790-X9Un+BFzKDI@public.gmane.org>
@ 2007-12-16 10:06                                                           ` Dmitry Adamushko
  0 siblings, 0 replies; 25+ messages in thread
From: Dmitry Adamushko @ 2007-12-16 10:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Peter Zijlstra, Srivatsa Vaddagiri, Steven Rostedt,
	containers-qjLDD68F18O7TbgM5vRIOg, Andrew Morton, Dhaval Giani

On 16/12/2007, Ingo Molnar <mingo-X9Un+BFzKDI@public.gmane.org> wrote:
>
> * Dmitry Adamushko <dmitry.adamushko-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:
>
> > --- a/kernel/sched.c
> > +++ b/kernel/sched.c
> > @@ -7360,7 +7360,7 @@ void sched_move_task(struct task_struct *tsk)
> >
> >         update_rq_clock(rq);
> >
> > -       running = task_running(rq, tsk);
> > +       running = (rq->curr == tsk);
> >         on_rq = tsk->se.on_rq;
>
> thanks, i've queued this up (pending more testing).

btw., sched_setscheduler() and rt_mutex_setprio() are also affected
(in general, anything that may call put_prev_task/set_curr_task()
relying task_running()).

Will see, maybe we may come up with smth better than just replacing
task_running() with (rq->curr == tsk) there.


> Btw., you should be able to force the ia64 scheduling by adding this to
> the very top of include/linux/sched.h:
>
> #define __ARCH_WANT_UNLOCKED_CTXSW
> #define __ARCH_WANT_INTERRUPTS_ON_CTXSW

Yeah, with both we even get ARM behavior. Can be a good test indeed.


>
>         Ingo
>


-- 
Best regards,
Dmitry Adamushko

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Hang with fair cgroup scheduler (reproducer is attached.)
       [not found]                                                         ` <20071216042821.GA8494-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
@ 2007-12-17  1:12                                                           ` KAMEZAWA Hiroyuki
       [not found]                                                             ` <20071217101245.76562518.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
  0 siblings, 1 reply; 25+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-12-17  1:12 UTC (permalink / raw)
  To: Dhaval Giani
  Cc: Peter Zijlstra, Srivatsa Vaddagiri, Steven Rostedt,
	Dmitry Adamushko, containers-qjLDD68F18O7TbgM5vRIOg,
	Andrew Morton, Ingo Molnar

On Sun, 16 Dec 2007 09:58:21 +0530
Dhaval Giani <dhaval-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org> wrote:

> On Sun, Dec 16, 2007 at 01:00:07AM +0100, Dmitry Adamushko wrote:
> > Dhaval,
> > 
> > so following the analysis in the previous mail... here is a test
> > patch. Could you please give it a try?
> > 
> 
> Yep, it works!
> 
> Tested-by: Dhaval Giani <dhaval-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> 
Works for me, too !!

Thanks,
-Kame

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Hang with fair cgroup scheduler (reproducer is attached.)
       [not found]                                                             ` <20071217101245.76562518.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
@ 2007-12-17 14:45                                                               ` Ingo Molnar
  0 siblings, 0 replies; 25+ messages in thread
From: Ingo Molnar @ 2007-12-17 14:45 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: Dhaval Giani, Srivatsa Vaddagiri, Steven Rostedt,
	Dmitry Adamushko, containers-qjLDD68F18O7TbgM5vRIOg,
	Andrew Morton, Peter Zijlstra


* KAMEZAWA Hiroyuki <kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org> wrote:

> > > so following the analysis in the previous mail... here is a test
> > > patch. Could you please give it a try?
> > > 
> > 
> > Yep, it works!
> > 
> > Tested-by: Dhaval Giani <dhaval-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> > 
> Works for me, too !!

thanks guys, i'll push Dmitry's fix out with the next scheduler git 
push.

	Ingo

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2007-12-17 14:45 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-12-14  7:18 Hang with fair cgroup scheduler (reproducer is attached.) KAMEZAWA Hiroyuki
     [not found] ` <20071214161834.034e6efe.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-12-14  8:17   ` KAMEZAWA Hiroyuki
     [not found]     ` <20071214171759.59f7ba57.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-12-14  9:49       ` Ingo Molnar
     [not found]         ` <20071214094909.GG11266-X9Un+BFzKDI@public.gmane.org>
2007-12-14 10:58           ` KAMEZAWA Hiroyuki
     [not found]             ` <20071214195837.0d3511db.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-12-14 11:48               ` Dhaval Giani
2007-12-14 12:47               ` Dmitry Adamushko
     [not found]                 ` <20071214141528.GA6161-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2007-12-14 14:24                   ` kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A
     [not found]                     ` <20442799.1197642268756.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-12-14 15:36                       ` Dhaval Giani
     [not found]                         ` <20071214153607.GB23670-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2007-12-14 15:38                           ` Dhaval Giani
     [not found]                             ` <20071214153823.GC23670-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2007-12-14 16:25                               ` Dmitry Adamushko
     [not found]                                 ` <b647ffbd0712140825h4f541be0xa7a7866e70b3af7a-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2007-12-14 19:51                                   ` Dmitry Adamushko
     [not found]                                     ` <b647ffbd0712141151k697d9bbemda9a7e90515e4400-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2007-12-14 21:33                                       ` Steven Rostedt
     [not found]                                         ` <Pine.LNX.4.58.0712141614340.22005-f9ZlEuEWxVcI6MkJdU+c8EEOCMrvLtNR@public.gmane.org>
2007-12-15 10:22                                           ` Dmitry Adamushko
     [not found]                                             ` <b647ffbd0712150222p30cac9f9i772c2a2c4e05a4a-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2007-12-15 10:50                                               ` Dhaval Giani
     [not found]                                                 ` <20071215105036.GB26325-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2007-12-15 11:15                                                   ` Dmitry Adamushko
2007-12-15 23:44                                               ` Dmitry Adamushko
     [not found]                                                 ` <b647ffbd0712151544n2dfad101r2d306d393e8550ff-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2007-12-16  0:00                                                   ` Dmitry Adamushko
     [not found]                                                     ` <b647ffbd0712151600s14e3f355we5ee6348b4d484cc-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2007-12-16  4:28                                                       ` Dhaval Giani
     [not found]                                                         ` <20071216042821.GA8494-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2007-12-17  1:12                                                           ` KAMEZAWA Hiroyuki
     [not found]                                                             ` <20071217101245.76562518.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-12-17 14:45                                                               ` Ingo Molnar
2007-12-16  8:55                                                       ` Ingo Molnar
     [not found]                                                         ` <20071216085559.GB20790-X9Un+BFzKDI@public.gmane.org>
2007-12-16 10:06                                                           ` Dmitry Adamushko
     [not found]             ` <b647ffbd0712140447kfba5945ybde40f18653dd164-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2007-12-14 12:50               ` kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A
2007-12-14 14:15               ` Dhaval Giani
2007-12-14  9:48   ` Ingo Molnar

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.