diff for duplicates of <53DF6EFC.30705@arm.com> diff --git a/a/1.txt b/N1/1.txt index 7668c13..108ca73 100644 --- a/a/1.txt +++ b/N1/1.txt @@ -1,33 +1,31 @@ On 04/08/14 04:20, Michael Ellerman wrote: > On Fri, 2014-08-01 at 14:24 -0700, Sukadev Bhattiprolu wrote: >> Dietmar Eggemann [dietmar.eggemann@arm.com] wrote: ->> | > ltcbrazos2-lp07 login: [ 181.915974] ------------[ cut here ]------= ------- +>> | > ltcbrazos2-lp07 login: [ 181.915974] ------------[ cut here ]------------ >> | > [ 181.915991] WARNING: at ../kernel/sched/core.c:5881 ->> |=20 ->> | This warning indicates the problem. One of the struct sched_domains do= -es +>> | +>> | This warning indicates the problem. One of the struct sched_domains does >> | not have it's groups member set. ->> |=20 +>> | >> | And its happening during a rebuild of the sched domain hierarchy, not >> | during the initial build. ->> |=20 +>> | >> | You could run your system with the following patch-let (on top of >> | https://lkml.org/lkml/2014/7/17/288) w/ and w/o the perf related >> | patches (w/ CONFIG_SCHED_DEBUG enabled). ->> |=20 +>> | >> | @@ -5882,6 +5882,9 @@ static void init_sched_groups_capacity(int cpu, >> | struct sched_domain *sd) >> | { ->> | struct sched_group *sg =3D sd->groups; ->> |=20 +>> | struct sched_group *sg = sd->groups; +>> | >> | +#ifdef CONFIG_SCHED_DEBUG >> | + printk("sd name: %s span: %pc\n", sd->name, sd->span); >> | +#endif >> | WARN_ON(!sg); ->> |=20 +>> | >> | do { ->> |=20 +>> | >> | This will show if the rebuild of the sched domain hierarchy happens on >> | both systems and hopefully indicate for which sched_domain the >> | sd->groups is not set. @@ -36,10 +34,8 @@ es >> have the sd->groups set - snippet of the error (with your patch and >> Peter's patch) >> ->> [ 181.914494] build_sched_groups: got group c000000006da0000 with cpus:= -=20 ->> [ 181.914498] build_sched_groups: got group c0000000dd830000 with cpus:= -=20 +>> [ 181.914494] build_sched_groups: got group c000000006da0000 with cpus: +>> [ 181.914498] build_sched_groups: got group c0000000dd830000 with cpus: >> [ 181.915234] sd name: SMT span: 8-15 >> [ 181.915239] sd name: DIE span: 0-7 >> [ 181.915242] sd name: NUMA span: 0-15 @@ -48,29 +44,27 @@ es >> >> Patched code: >> ->> =095884 static void init_sched_groups_capacity(int cpu, struct sched_dom= -ain *sd) ->> =095885 { ->> =095886 struct sched_group *sg =3D sd->groups; ->> =095887=20 ->> =095888 #ifdef CONFIG_SCHED_DEBUG ->> =095889 printk("sd name: %s span: %pc\n", sd->name, sd->span); ->> =095890 #endif ->> =095891 WARN_ON(!sg); +>> 5884 static void init_sched_groups_capacity(int cpu, struct sched_domain *sd) +>> 5885 { +>> 5886 struct sched_group *sg = sd->groups; +>> 5887 +>> 5888 #ifdef CONFIG_SCHED_DEBUG +>> 5889 printk("sd name: %s span: %pc\n", sd->name, sd->span); +>> 5890 #endif +>> 5891 WARN_ON(!sg); >> >> Complete log below. >> >> I was able to bisect it down to this patch in the 24x7 patchset >> ->> =09https://lkml.org/lkml/2014/5/27/804 +>> https://lkml.org/lkml/2014/5/27/804 >> >> I replaced the kfree(page) calls in the patch with >> kmem_cache_free(hv_page_cache, page). >> ->> The problem sems to disappear if the call to create_events_from_catalog(= -) +>> The problem sems to disappear if the call to create_events_from_catalog() >> in hv_24x7_init() is skipped. I am continuing to debug the 24x7 patch. ->=20 +> > Is that patch just clobbering memory it doesn't own and corrupting the > scheduler data structures? @@ -120,7 +114,7 @@ sched_domain_topology_level *tl, + cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu)); if (child) { - sd->level =3D child->level + 1; + sd->level = child->level + 1; Should give you something similar like: @@ -134,14 +128,13 @@ build_sched_domain: cpu: 1 level: MC cpu_map: 0-4 tl->mask: 0-1 build_sched_domain: cpu: 1 level: DIE cpu_map: 0-4 tl->mask: 0-4 ... ->=20 +> > cheers ->=20 ->=20 +> +> > -- -> To unsubscribe from this list: send the line "unsubscribe linux-kernel" i= -n +> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ ->=20 +> diff --git a/a/content_digest b/N1/content_digest index d3d22d6..9935cfa 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -18,33 +18,31 @@ "On 04/08/14 04:20, Michael Ellerman wrote:\n" "> On Fri, 2014-08-01 at 14:24 -0700, Sukadev Bhattiprolu wrote:\n" ">> Dietmar Eggemann [dietmar.eggemann@arm.com] wrote:\n" - ">> | > ltcbrazos2-lp07 login: [ 181.915974] ------------[ cut here ]------=\n" - "------\n" + ">> | > ltcbrazos2-lp07 login: [ 181.915974] ------------[ cut here ]------------\n" ">> | > [ 181.915991] WARNING: at ../kernel/sched/core.c:5881\n" - ">> |=20\n" - ">> | This warning indicates the problem. One of the struct sched_domains do=\n" - "es\n" + ">> | \n" + ">> | This warning indicates the problem. One of the struct sched_domains does\n" ">> | not have it's groups member set.\n" - ">> |=20\n" + ">> | \n" ">> | And its happening during a rebuild of the sched domain hierarchy, not\n" ">> | during the initial build.\n" - ">> |=20\n" + ">> | \n" ">> | You could run your system with the following patch-let (on top of\n" ">> | https://lkml.org/lkml/2014/7/17/288) w/ and w/o the perf related\n" ">> | patches (w/ CONFIG_SCHED_DEBUG enabled).\n" - ">> |=20\n" + ">> | \n" ">> | @@ -5882,6 +5882,9 @@ static void init_sched_groups_capacity(int cpu,\n" ">> | struct sched_domain *sd)\n" ">> | {\n" - ">> | struct sched_group *sg =3D sd->groups;\n" - ">> |=20\n" + ">> | struct sched_group *sg = sd->groups;\n" + ">> | \n" ">> | +#ifdef CONFIG_SCHED_DEBUG\n" ">> | + printk(\"sd name: %s span: %pc\\n\", sd->name, sd->span);\n" ">> | +#endif\n" ">> | WARN_ON(!sg);\n" - ">> |=20\n" + ">> | \n" ">> | do {\n" - ">> |=20\n" + ">> | \n" ">> | This will show if the rebuild of the sched domain hierarchy happens on\n" ">> | both systems and hopefully indicate for which sched_domain the\n" ">> | sd->groups is not set.\n" @@ -53,10 +51,8 @@ ">> have the sd->groups set - snippet of the error (with your patch and\n" ">> Peter's patch)\n" ">>\n" - ">> [ 181.914494] build_sched_groups: got group c000000006da0000 with cpus:=\n" - "=20\n" - ">> [ 181.914498] build_sched_groups: got group c0000000dd830000 with cpus:=\n" - "=20\n" + ">> [ 181.914494] build_sched_groups: got group c000000006da0000 with cpus: \n" + ">> [ 181.914498] build_sched_groups: got group c0000000dd830000 with cpus: \n" ">> [ 181.915234] sd name: SMT span: 8-15\n" ">> [ 181.915239] sd name: DIE span: 0-7\n" ">> [ 181.915242] sd name: NUMA span: 0-15\n" @@ -65,29 +61,27 @@ ">>\n" ">> Patched code:\n" ">>\n" - ">> =095884 static void init_sched_groups_capacity(int cpu, struct sched_dom=\n" - "ain *sd)\n" - ">> =095885 {\n" - ">> =095886 struct sched_group *sg =3D sd->groups;\n" - ">> =095887=20\n" - ">> =095888 #ifdef CONFIG_SCHED_DEBUG\n" - ">> =095889 printk(\"sd name: %s span: %pc\\n\", sd->name, sd->span);\n" - ">> =095890 #endif\n" - ">> =095891 WARN_ON(!sg);\n" + ">> \t5884 static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)\n" + ">> \t5885 {\n" + ">> \t5886 struct sched_group *sg = sd->groups;\n" + ">> \t5887 \n" + ">> \t5888 #ifdef CONFIG_SCHED_DEBUG\n" + ">> \t5889 printk(\"sd name: %s span: %pc\\n\", sd->name, sd->span);\n" + ">> \t5890 #endif\n" + ">> \t5891 WARN_ON(!sg);\n" ">>\n" ">> Complete log below.\n" ">>\n" ">> I was able to bisect it down to this patch in the 24x7 patchset\n" ">>\n" - ">> =09https://lkml.org/lkml/2014/5/27/804\n" + ">> \thttps://lkml.org/lkml/2014/5/27/804\n" ">>\n" ">> I replaced the kfree(page) calls in the patch with\n" ">> kmem_cache_free(hv_page_cache, page).\n" ">>\n" - ">> The problem sems to disappear if the call to create_events_from_catalog(=\n" - ")\n" + ">> The problem sems to disappear if the call to create_events_from_catalog()\n" ">> in hv_24x7_init() is skipped. I am continuing to debug the 24x7 patch.\n" - ">=20\n" + "> \n" "> Is that patch just clobbering memory it doesn't own and corrupting the\n" "> scheduler data structures?\n" "\n" @@ -137,7 +131,7 @@ "+\n" " cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));\n" " if (child) {\n" - " sd->level =3D child->level + 1;\n" + " sd->level = child->level + 1;\n" "\n" "\n" "Should give you something similar like:\n" @@ -151,16 +145,15 @@ "build_sched_domain: cpu: 1 level: DIE cpu_map: 0-4 tl->mask: 0-4\n" "...\n" "\n" - ">=20\n" + "> \n" "> cheers\n" - ">=20\n" - ">=20\n" + "> \n" + "> \n" "> --\n" - "> To unsubscribe from this list: send the line \"unsubscribe linux-kernel\" i=\n" - "n\n" + "> To unsubscribe from this list: send the line \"unsubscribe linux-kernel\" in\n" "> the body of a message to majordomo@vger.kernel.org\n" "> More majordomo info at http://vger.kernel.org/majordomo-info.html\n" "> Please read the FAQ at http://www.tux.org/lkml/\n" - >=20 + > -14d5f07081149a4bc5ff007076c2cc2bbb704ecd5523ac41ae1d201383f9ccde +66c9f3bc1092bcd37dade7ff56066b46f3e0a9e4e4782791703cae24b47b91e2
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.