From mboxrd@z Thu Jan 1 00:00:00 1970 From: MaoXiaoyun Subject: RE: Kernel BUG at arch/x86/mm/tlb.c:61 Date: Mon, 25 Apr 2011 12:42:48 +0800 Message-ID: References: , , , , , , , <4DA3438A.6070503@goop.org>, , , <20110412100000.GA15647@dumpdata.com>, , , , , <4DA8B715.9080508@goop.org> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="===============1278404576==" Return-path: In-Reply-To: <4DA8B715.9080508@goop.org> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xensource.com Errors-To: xen-devel-bounces@lists.xensource.com To: jeremy@goop.org Cc: xen devel , giamteckchoon@gmail.com, konrad.wilk@oracle.com List-Id: xen-devel@lists.xenproject.org --===============1278404576== Content-Type: multipart/alternative; boundary="_8a771ba3-574c-42cf-ab64-c54cc7d9a52f_" --_8a771ba3-574c-42cf-ab64-c54cc7d9a52f_ Content-Type: text/plain; charset="gb2312" Content-Transfer-Encoding: quoted-printable I go through the switch_mm more, and come up one more question: =20 Why we don't need to clear prev cpumask in line between line 59 and 60? =20 Say 1) Context is switch from process A to kernel, then kernel has active_mm= -> A's mm 2) Context is switch from kernel to A, in sched.c oldmm =3D A's mm; mm =3D= A's mm 3) it will call arch/x86/include/asm/mmu_context.h:60, since prev =3D ne= xt; if another CPU flush A's mm, but this cpu don't clear CPU mask, it m= ight enter IPI interrput routine, and also find cpu_tlbstate.state is TLBSTATE_OK. =20 Could this possible? =20 kernel/sched.c =20 2999 context_switch(struct rq *rq, struct task_struct *prev, 3000 struct task_struct *next) 3001 { 3002 struct mm_struct *mm, *oldmm; 3003=20 3004 prepare_task_switch(rq, prev, next); 3005 trace_sched_switch(rq, prev, next); 3006 mm =3D next->mm; 3007 oldmm =3D prev->active_mm; 3008 /* 3009 * For paravirt, this is coupled with an exit in switch_to to 3010 * combine the page table reload and the switch backend into 3011 * one hypercall. 3012 */ 3013 arch_start_context_switch(prev); 3014=20 3015 if (unlikely(!mm)) { 3016 next->active_mm =3D oldmm; 3017 atomic_inc(&oldmm->mm_count); 3018 enter_lazy_tlb(oldmm, next); 3019 } else 3020 switch_mm(oldmm, mm, next); 3021=20 3022 if (unlikely(!prev->mm)) { 3023 prev->active_mm =3D NULL; 3024 rq->prev_mm =3D oldmm; 3025 } =20 =20 33 static inline void switch_mm(struct mm_struct *prev, struct mm_struct= *next, 34 struct task_struct *tsk) 35 { 36 unsigned cpu =3D smp_processor_id(); 37=20 38 if (likely(prev !=3D next)) { 39 /* stop flush ipis for the previous mm */ 40 cpumask_clear_cpu(cpu, mm_cpumask(prev)); 41=20 42=20 43 #ifdef CONFIG_SMP 44 percpu_write(cpu_tlbstate.state, TLBSTATE_OK); 45 percpu_write(cpu_tlbstate.active_mm, next); 46 #endif 47 cpumask_set_cpu(cpu, mm_cpumask(next)); 48=20 49 /* Re-load page tables */ 50 load_cr3(next->pgd); 51=20 52 /* 53 * load the LDT, if the LDT is different: 54 */ 55 if (unlikely(prev->context.ldt !=3D next->context.ldt)) 56 load_LDT_nolock(&next->context); 57 } 58 #ifdef CONFIG_SMP 59 else { 60 percpu_write(cpu_tlbstate.state, TLBSTATE_OK); 61 BUG_ON(percpu_read(cpu_tlbstate.active_mm) !=3D next); 62=20 63 if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) { 64 /* We were in lazy tlb mode and leave_mm disabled 65 * tlb flush IPI delivery. We must reload CR3 66 * to make sure to use no freed page tables. 67 */ 68 load_cr3(next->pgd); 69 load_LDT_nolock(&next->context); 70 } 71 }=20 =20 --_8a771ba3-574c-42cf-ab64-c54cc7d9a52f_ Content-Type: text/html; charset="gb2312" Content-Transfer-Encoding: quoted-printable I go through the switch_mm more, and come up one more question:
 
Why we don't need to clear prev cpumask in line between line 59 and 60?  
Say
1)  Context is switch from process A to kernel, then kernel has= active_mm-> A's mm
2)  Context is switch from kernel to A, in sched.c oldmm =3D A's mm;= mm =3D A's mm
3)  it will call arch/x86/include/asm/mmu_context.h:60, si= nce prev =3D next;
     if another CPU flush A's mm, but th= is cpu don't clear CPU mask, it might enter IPI interrput
     routine, and also find cpu_tlbstate.state is TLBSTATE_OK.
 
Could this possible?
 
kernel/sched.c
 
2999 context_switch(struct rq *rq, struct task_struct *prev,
 300= 0            struc= t task_struct *next)
 3001 {
 3002    = ; struct mm_struct *mm, *oldmm;
 3003
 3004  &= nbsp;  prepare_task_switch(rq, prev, next);
 3005  = ;   trace_sched_switch(rq, prev, next);
 3006 &nbs= p;   mm =3D next->mm;
 3007     = oldmm =3D prev->active_mm;
 3008     /* 3009      * For paravirt, this is coupled= with an exit in switch_to to
 3010     = * combine the page table reload and the switch backend into
 301= 1      * one hypercall.
 3012  = ;    */
 3013     arch_start_c= ontext_switch(prev);
 3014
  3015     if (unlikely(!mm)) {
 3016 &nb= sp;       next->active_mm =3D oldmm;
=  3017         atomic_inc(&am= p;oldmm->mm_count);
 3018      &= nbsp;  enter_lazy_tlb(oldmm, next);
 3019   &= nbsp; } else
 3020        = ; switch_mm(oldmm, mm, next);
 3021
 3022  &nb= sp;  if (unlikely(!prev->mm)) {
 3023   &n= bsp;     prev->active_mm =3D NULL;
 3024&n= bsp;        rq->prev_mm =3D oldmm;<= BR> 3025     }
 
 
 33 static inline void switch_mm(struct mm_struct *prev, struct mm_s= truct *next,
 34        &= nbsp;         struct task_struct = *tsk)
 35 {
 36     unsigned cpu =3D = smp_processor_id();
 37
 38     if (= likely(prev !=3D next)) {
 39      =    /* stop flush ipis for the previous mm */
 40 &= nbsp;       cpumask_clear_cpu(cpu, mm_cpuma= sk(prev));
 41
 42
 43 #ifdef CONFIG_SMP
&nb= sp;44         percpu_write(cpu_tl= bstate.state, TLBSTATE_OK);
 45     &nbs= p;   percpu_write(cpu_tlbstate.active_mm, next);
 46 #e= ndif
 47         cpumask_= set_cpu(cpu, mm_cpumask(next));
 48 < BR> 49         /* Re-load p= age tables */
 50        = load_cr3(next->pgd);
 51
 52    =      /*
 53     &nbs= p;    * load the LDT, if the LDT is different:
 54=           */
 55&nbs= p;        if (unlikely(prev->contex= t.ldt !=3D next->context.ldt))
 56    &nbs= p;        load_LDT_nolock(&next-&g= t;context);
 57     }
 58 #ifdef CONF= IG_SMP
 59     else {
 60  =        percpu_write(cpu_tlbstate.state, TLB= STATE_OK);
 61         BU= G_ON(percpu_read(cpu_tlbstate.active_mm) !=3D=20 next);
 62
 63       = ;  if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) {
 = 64            = ; /* We were in lazy tlb mode and leave_mm disabled
 65 &nbs= p;            * tl= b flush IPI delivery. We must reload CR3
 66   &nb= sp;          * to make sure = to use no freed page tables.
 67     &nb= sp;        */
 68  &= nbsp;          load_cr3(next= ->pgd);
 69        &nb= sp;    load_LDT_nolock(&next->context);
 70=          }
 71  = ;   }
--_8a771ba3-574c-42cf-ab64-c54cc7d9a52f_-- --===============1278404576== Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: inline _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel --===============1278404576==--