From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andrew Cooper Subject: Re: [RFC v1 13/15] Update Posted-Interrupts Descriptor during vCPU scheduling Date: Thu, 26 Mar 2015 20:16:02 +0000 Message-ID: <55146902.4020504@citrix.com> References: <1427286717-4093-1-git-send-email-feng.wu@intel.com> <1427286717-4093-14-git-send-email-feng.wu@intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii"; Format="flowed" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1427286717-4093-14-git-send-email-feng.wu@intel.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Feng Wu , xen-devel@lists.xen.org Cc: yang.z.zhang@intel.com, kevin.tian@intel.com, keir@xen.org, JBeulich@suse.com List-Id: xen-devel@lists.xenproject.org On 25/03/15 12:31, Feng Wu wrote: > The basic idea here is: > 1. When vCPU's state is RUNSTATE_running, > - set 'NV' to 'Notification Vector'. > - Clear 'SN' to accpet PI. > - set 'NDST' to the right pCPU. > 2. When vCPU's state is RUNSTATE_blocked, > - set 'NV' to 'Wake-up Vector', so we can wake up the > related vCPU when posted-interrupt happens for it. > - Clear 'SN' to accpet PI. > 3. When vCPU's state is RUNSTATE_runnable/RUNSTATE_offline, > - Set 'SN' to suppress non-urgent interrupts. > (Current, we only support non-urgent interrupts) > - Set 'NV' back to 'Notification Vector' if needed. > > Signed-off-by: Feng Wu > --- > xen/arch/x86/hvm/vmx/vmx.c | 108 +++++++++++++++++++++++++++++++++++++++++++++ > xen/common/schedule.c | 3 ++ > 2 files changed, 111 insertions(+) > > diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c > index b30392c..6323bd6 100644 > --- a/xen/arch/x86/hvm/vmx/vmx.c > +++ b/xen/arch/x86/hvm/vmx/vmx.c > @@ -1710,6 +1710,113 @@ static void vmx_handle_eoi(u8 vector) > __vmwrite(GUEST_INTR_STATUS, status); > } > > +static void vmx_pi_desc_update(struct vcpu *v, int new_state) > +{ > + struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc; > + struct pi_desc old, new; > + int old_state = v->runstate.state; > + unsigned long flags; > + > + if ( !iommu_intpost ) > + return; > + > + switch ( new_state ) > + { > + case RUNSTATE_runnable: > + case RUNSTATE_offline: > + /* > + * We don't need to send notification event to a non-running > + * vcpu, the interrupt information will be delivered to it before > + * VM-ENTRY when the vcpu is scheduled to run next time. > + */ > + pi_set_sn(pi_desc); > + > + /* > + * If the state is transferred from RUNSTATE_blocked, > + * we should set 'NV' feild back to posted_intr_vector, > + * so the Posted-Interrupts can be delivered to the vCPU > + * by VT-d HW after it is scheduled to run. > + */ > + if ( old_state == RUNSTATE_blocked ) > + { > + do > + { > + old.control = new.control = pi_desc->control; > + new.nv = posted_intr_vector; > + } > + while ( cmpxchg(&pi_desc->control, old.control, new.control) > + != old.control ); > + > + /* > + * Delete the vCPU from the related wakeup queue > + * if we are resuming from blocked state > + */ > + spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, > + v->processor), flags); > + list_del(&v->blocked_vcpu_list); The scheduler is perfectly able to change v->processor behind your back, so your spinlocks are not protecting access to v->blocked_vcpu_list. ~Andrew > + spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock, > + v->processor), flags); > + } > + break; > + > + case RUNSTATE_blocked: > + /* > + * The vCPU is blocked on the wait queue. > + * Store the blocked vCPU on the list of the > + * vcpu->wakeup_cpu, which is the destination > + * of the wake-up notification event. > + */ > + spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, > + v->processor), flags); > + list_add_tail(&v->blocked_vcpu_list, > + &per_cpu(blocked_vcpu_on_cpu, v->processor)); > + spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock, > + v->processor), flags); > + > + do > + { > + old.control = new.control = pi_desc->control; > + > + /* > + * We should not block the vCPU if > + * an interrupt is posted for it. > + */ > + > + if ( pi_test_on(&old) == 1 ) > + { > + tasklet_schedule(&v->vcpu_wakeup_tasklet); > + return; > + } > + > + pi_clear_sn(&new); > + new.nv = pi_wakeup_vector; > + } > + while ( cmpxchg(&pi_desc->control, old.control, new.control) > + != old.control ); > + break; > + > + case RUNSTATE_running: > + ASSERT( pi_test_sn(pi_desc) == 1 ); > + > + do > + { > + old.control = new.control = pi_desc->control; > + if ( x2apic_enabled ) > + new.ndst = cpu_physical_id(v->processor); > + else > + new.ndst = (cpu_physical_id(v->processor) << 8) & 0xFF00; > + > + pi_clear_sn(&new); > + } > + while ( cmpxchg(&pi_desc->control, old.control, new.control) > + != old.control ); > + break; > + > + default: > + break; > + } > +} > + > void vmx_hypervisor_cpuid_leaf(uint32_t sub_idx, > uint32_t *eax, uint32_t *ebx, > uint32_t *ecx, uint32_t *edx) > @@ -1795,6 +1902,7 @@ static struct hvm_function_table __initdata vmx_function_table = { > .process_isr = vmx_process_isr, > .deliver_posted_intr = vmx_deliver_posted_intr, > .sync_pir_to_irr = vmx_sync_pir_to_irr, > + .pi_desc_update = vmx_pi_desc_update, > .handle_eoi = vmx_handle_eoi, > .nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m, > .hypervisor_cpuid_leaf = vmx_hypervisor_cpuid_leaf, > diff --git a/xen/common/schedule.c b/xen/common/schedule.c > index ef79847..acf3186 100644 > --- a/xen/common/schedule.c > +++ b/xen/common/schedule.c > @@ -157,6 +157,9 @@ static inline void vcpu_runstate_change( > v->runstate.state_entry_time = new_entry_time; > } > > + if ( is_hvm_vcpu(v) && hvm_funcs.pi_desc_update ) > + hvm_funcs.pi_desc_update(v, new_state); > + > v->runstate.state = new_state; > } >