From: Feng Wu <feng.wu@intel.com>
To: xen-devel@lists.xen.org
Cc: kevin.tian@intel.com, keir@xen.org, george.dunlap@eu.citrix.com,
andrew.cooper3@citrix.com, dario.faggioli@citrix.com,
jbeulich@suse.com, Feng Wu <feng.wu@intel.com>
Subject: [PATCH v2 4/4] VMX: fixup PI descritpor when cpu is offline
Date: Thu, 26 May 2016 21:39:14 +0800 [thread overview]
Message-ID: <1464269954-8056-5-git-send-email-feng.wu@intel.com> (raw)
In-Reply-To: <1464269954-8056-1-git-send-email-feng.wu@intel.com>
When cpu is offline, we need to move all the vcpus in its blocking
list to another online cpu, this patch handles it. And we need to
carefully handle the situation that calling to vmx_vcpu_block() and
cpu offline occur concurrently.
Signed-off-by: Feng Wu <feng.wu@intel.com>
---
xen/arch/x86/hvm/vmx/vmcs.c | 1 +
xen/arch/x86/hvm/vmx/vmx.c | 90 +++++++++++++++++++++++++++++++++++++-
xen/common/schedule.c | 8 +---
xen/include/asm-x86/hvm/hvm.h | 4 +-
xen/include/asm-x86/hvm/vmx/vmcs.h | 2 +-
xen/include/asm-x86/hvm/vmx/vmx.h | 1 +
6 files changed, 96 insertions(+), 10 deletions(-)
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 8284281..aa25788 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -590,6 +590,7 @@ void vmx_cpu_dead(unsigned int cpu)
vmx_free_vmcs(per_cpu(vmxon_region, cpu));
per_cpu(vmxon_region, cpu) = 0;
nvmx_cpu_dead(cpu);
+ vmx_pi_desc_fixup(cpu);
}
int vmx_cpu_up(void)
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 662b38d..b56082e 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -87,6 +87,7 @@ static int vmx_vmfunc_intercept(struct cpu_user_regs *regs);
struct vmx_pi_blocking_vcpu {
struct list_head list;
spinlock_t lock;
+ bool_t down;
};
/*
@@ -102,9 +103,10 @@ void vmx_pi_per_cpu_init(unsigned int cpu)
{
INIT_LIST_HEAD(&per_cpu(vmx_pi_blocking, cpu).list);
spin_lock_init(&per_cpu(vmx_pi_blocking, cpu).lock);
+ per_cpu(vmx_pi_blocking, cpu).down = 0;
}
-static void vmx_vcpu_block(struct vcpu *v)
+static bool_t vmx_vcpu_block(struct vcpu *v)
{
unsigned long flags;
unsigned int dest;
@@ -122,10 +124,25 @@ static void vmx_vcpu_block(struct vcpu *v)
* new vCPU to the list.
*/
spin_unlock_irqrestore(&v->arch.hvm_vmx.pi_hotplug_lock, flags);
- return;
+ return 1;
}
spin_lock(pi_blocking_list_lock);
+ if ( unlikely(per_cpu(vmx_pi_blocking, v->processor).down) )
+ {
+ /*
+ * We being here means that the v->processor is going away, and all
+ * the vcpus on its blocking list were removed from it. Hence we
+ * cannot add new vcpu to it. Besides that, we return -1 to
+ * prevent the vcpu from being blocked. This is needed because
+ * if the vCPU is continue to block and here we don't put it
+ * in a per-cpu blocking list, it might not be woken up by the
+ * notification event.
+ */
+ spin_unlock(pi_blocking_list_lock);
+ spin_unlock_irqrestore(&v->arch.hvm_vmx.pi_hotplug_lock, flags);
+ return 0;
+ }
old_lock = cmpxchg(&v->arch.hvm_vmx.pi_blocking.lock, NULL,
pi_blocking_list_lock);
@@ -161,6 +178,8 @@ static void vmx_vcpu_block(struct vcpu *v)
x2apic_enabled ? dest : MASK_INSR(dest, PI_xAPIC_NDST_MASK));
write_atomic(&pi_desc->nv, pi_wakeup_vector);
+
+ return 1;
}
static void vmx_pi_switch_from(struct vcpu *v)
@@ -253,6 +272,73 @@ static void vmx_pi_blocking_cleanup(struct vcpu *v)
spin_unlock_irqrestore(&v->arch.hvm_vmx.pi_hotplug_lock, flags);
}
+void vmx_pi_desc_fixup(int cpu)
+{
+ unsigned int new_cpu, dest;
+ unsigned long flags;
+ struct arch_vmx_struct *vmx, *tmp;
+ spinlock_t *new_lock, *old_lock = &per_cpu(vmx_pi_blocking, cpu).lock;
+ struct list_head *blocked_vcpus = &per_cpu(vmx_pi_blocking, cpu).list;
+
+ if ( !iommu_intpost )
+ return;
+
+ spin_lock_irqsave(old_lock, flags);
+ per_cpu(vmx_pi_blocking, cpu).down = 1;
+
+ list_for_each_entry_safe(vmx, tmp, blocked_vcpus, pi_blocking.list)
+ {
+ /*
+ * We need to find an online cpu as the NDST of the PI descriptor, it
+ * doesn't matter whether it is within the cpupool of the domain or
+ * not. As long as it is online, the vCPU will be woken up once the
+ * notification event arrives.
+ */
+ new_cpu = cpu;
+restart:
+ while ( 1 )
+ {
+ new_cpu = (new_cpu + 1) % nr_cpu_ids;
+ if ( cpu_online(new_cpu) )
+ break;
+ }
+ new_lock = &per_cpu(vmx_pi_blocking, cpu).lock;
+
+ spin_lock(new_lock);
+ /*
+ * After acquiring the blocking list lock for the new cpu, we need
+ * to check whether new_cpu is still online.
+ *
+ * If '.down' is true, it mean 'new_cpu' is also going to be offline,
+ * so just go back to find another one, otherwise, there are two
+ * possibilities:
+ * case 1 - 'new_cpu' is online.
+ * case 2 - 'new_cpu' is about to be offline, but doesn't get to
+ * the point where '.down' is set.
+ * In either case above, we can just set 'new_cpu' to 'NDST' field.
+ * For case 2 the 'NDST' field will be set to another online cpu when
+ * we get to this function for 'new_cpu' some time later.
+ */
+ if ( per_cpu(vmx_pi_blocking, cpu).down )
+ {
+ spin_unlock(new_lock);
+ goto restart;
+ }
+
+ ASSERT(vmx->pi_blocking.lock == old_lock);
+
+ dest = cpu_physical_id(new_cpu);
+ write_atomic(&vmx->pi_desc.ndst,
+ x2apic_enabled ? dest : MASK_INSR(dest, PI_xAPIC_NDST_MASK));
+
+ list_move(&vmx->pi_blocking.list,
+ &per_cpu(vmx_pi_blocking, new_cpu).list);
+ vmx->pi_blocking.lock = new_lock;
+ spin_unlock(new_lock);
+ }
+ spin_unlock_irqrestore(old_lock, flags);
+}
+
/* This function is called when pcidevs_lock is held */
void vmx_pi_hooks_assign(struct domain *d)
{
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 5546999..b60491d 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -833,10 +833,8 @@ void vcpu_block(void)
set_bit(_VPF_blocked, &v->pause_flags);
- arch_vcpu_block(v);
-
/* Check for events /after/ blocking: avoids wakeup waiting race. */
- if ( local_events_need_delivery() )
+ if ( arch_vcpu_block(v) || local_events_need_delivery() )
{
clear_bit(_VPF_blocked, &v->pause_flags);
}
@@ -872,8 +870,6 @@ static long do_poll(struct sched_poll *sched_poll)
v->poll_evtchn = -1;
set_bit(v->vcpu_id, d->poll_mask);
- arch_vcpu_block(v);
-
#ifndef CONFIG_X86 /* set_bit() implies mb() on x86 */
/* Check for events /after/ setting flags: avoids wakeup waiting race. */
smp_mb();
@@ -891,7 +887,7 @@ static long do_poll(struct sched_poll *sched_poll)
#endif
rc = 0;
- if ( local_events_need_delivery() )
+ if ( arch_vcpu_block(v) || local_events_need_delivery() )
goto out;
for ( i = 0; i < sched_poll->nr_ports; i++ )
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 7b7ff3f..725dce7 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -608,11 +608,13 @@ unsigned long hvm_cr4_guest_reserved_bits(const struct vcpu *v, bool_t restore);
* not been defined yet.
*/
#define arch_vcpu_block(v) ({ \
+ bool_t rc = 0; \
struct vcpu *v_ = (v); \
struct domain *d_ = v_->domain; \
if ( has_hvm_container_domain(d_) && \
d_->arch.hvm_domain.vmx.vcpu_block ) \
- d_->arch.hvm_domain.vmx.vcpu_block(v_); \
+ rc = d_->arch.hvm_domain.vmx.vcpu_block(v_); \
+ rc; \
})
#endif /* __ASM_X86_HVM_HVM_H__ */
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h
index 3834f49..e1834f7 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -132,7 +132,7 @@ struct vmx_domain {
* has a physical device assigned to it, so we set and clear the callbacks
* as appropriate when device assignment changes.
*/
- void (*vcpu_block) (struct vcpu *);
+ bool_t (*vcpu_block) (struct vcpu *);
void (*pi_switch_from) (struct vcpu *v);
void (*pi_switch_to) (struct vcpu *v);
void (*pi_do_resume) (struct vcpu *v);
diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h b/xen/include/asm-x86/hvm/vmx/vmx.h
index a85d488..b6ba123 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -565,6 +565,7 @@ void free_p2m_hap_data(struct p2m_domain *p2m);
void p2m_init_hap_data(struct p2m_domain *p2m);
void vmx_pi_per_cpu_init(unsigned int cpu);
+void vmx_pi_desc_fixup(int cpu);
void vmx_pi_hooks_assign(struct domain *d);
void vmx_pi_hooks_deassign(struct domain *d);
--
2.1.0
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
next prev parent reply other threads:[~2016-05-26 13:39 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-05-26 13:39 [PATCH v2 0/4] VMX: Properly handle pi descriptor and per-cpu blocking list Feng Wu
2016-05-26 13:39 ` [PATCH v2 1/4] VMX: Properly handle pi when all the assigned devices are removed Feng Wu
2016-05-27 13:43 ` Jan Beulich
2016-05-31 10:22 ` Wu, Feng
2016-05-31 11:52 ` Jan Beulich
2016-06-03 5:12 ` Wu, Feng
2016-06-03 9:45 ` Jan Beulich
2016-05-26 13:39 ` [PATCH v2 2/4] VMX: Cleanup PI per-cpu blocking list when vcpu is destroyed Feng Wu
2016-05-27 13:49 ` Jan Beulich
2016-05-31 10:22 ` Wu, Feng
2016-05-31 11:54 ` Jan Beulich
2016-05-26 13:39 ` [PATCH v2 3/4] VMX: Assign the right value to 'NDST' field in a concern case Feng Wu
2016-05-27 14:00 ` Jan Beulich
2016-05-31 10:27 ` Wu, Feng
2016-05-31 11:58 ` Jan Beulich
2016-06-03 5:23 ` Wu, Feng
2016-06-03 9:57 ` Jan Beulich
2016-06-22 18:00 ` George Dunlap
2016-06-24 9:08 ` Wu, Feng
2016-05-26 13:39 ` Feng Wu [this message]
2016-05-27 14:56 ` [PATCH v2 4/4] VMX: fixup PI descritpor when cpu is offline Jan Beulich
2016-05-31 10:31 ` Wu, Feng
2016-06-22 18:33 ` George Dunlap
2016-06-24 6:34 ` Wu, Feng
2016-05-26 17:20 ` [PATCH v2 0/4] VMX: Properly handle pi descriptor and per-cpu blocking list Dario Faggioli
2016-05-31 10:19 ` Wu, Feng
2016-06-22 21:33 ` Dario Faggioli
2016-06-24 6:33 ` Wu, Feng
2016-06-24 10:29 ` Dario Faggioli
2016-06-24 13:42 ` Wu, Feng
2016-06-24 13:49 ` George Dunlap
2016-06-24 14:36 ` Dario Faggioli
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1464269954-8056-5-git-send-email-feng.wu@intel.com \
--to=feng.wu@intel.com \
--cc=andrew.cooper3@citrix.com \
--cc=dario.faggioli@citrix.com \
--cc=george.dunlap@eu.citrix.com \
--cc=jbeulich@suse.com \
--cc=keir@xen.org \
--cc=kevin.tian@intel.com \
--cc=xen-devel@lists.xen.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).