[RFC v2 PATCH 21/21] x86: request TLB flush to slave CPU using NMI

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Tomoki Sekiyama <tomoki.sekiyama.qu@hitachi.com>
To: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, x86@kernel.org,
	yrl.pp-manager.tt@hitachi.com,
	Tomoki Sekiyama <tomoki.sekiyama.qu@hitachi.com>,
	Avi Kivity <avi@redhat.com>,
	Marcelo Tosatti <mtosatti@redhat.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, "H. Peter Anvin" <hpa@zytor.com>
Subject: [RFC v2 PATCH 21/21] x86: request TLB flush to slave CPU using NMI
Date: Thu, 06 Sep 2012 20:29:09 +0900	[thread overview]
Message-ID: <20120906112909.13320.88939.stgit@kvmdev> (raw)
In-Reply-To: <20120906112718.13320.8231.stgit@kvmdev>

For slave CPUs, it is inapropriate to request TLB flush using IPI.
because the IPI may be sent to a KVM guest when the slave CPU is running
the guest with direct interrupt routing.

Instead, it registers a TLB flush request in per-cpu bitmask and send a NMI
to interrupt execution of the guest. Then, NMI handler will check the
requests and handles the requests.

This implementation has an issue in scalability, and is just for PoC.

Signed-off-by: Tomoki Sekiyama <tomoki.sekiyama.qu@hitachi.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---

 arch/x86/include/asm/tlbflush.h |    5 ++
 arch/x86/kernel/smpboot.c       |    3 +
 arch/x86/kvm/x86.c              |    5 ++
 arch/x86/mm/tlb.c               |   94 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 106 insertions(+), 1 deletions(-)

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 74a4433..bcd637b 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -168,6 +168,11 @@ static inline void reset_lazy_tlbstate(void)
 	this_cpu_write(cpu_tlbstate.active_mm, &init_mm);
 }
 
+#ifdef CONFIG_SLAVE_CPU
+DECLARE_PER_CPU(bool, slave_idle);
+void handle_slave_tlb_flush(unsigned int cpu);
+#endif	/* SLAVE_CPU */
+
 #endif	/* SMP */
 
 #ifndef CONFIG_PARAVIRT
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ba7c99b..9854087 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -395,7 +395,10 @@ notrace static void __cpuinit start_slave_cpu(void *unused)
 		rcu_note_context_switch(cpu);
 
 		if (!f.func) {
+			__this_cpu_write(slave_idle, 1);
+			handle_slave_tlb_flush(cpu);
 			native_safe_halt();
+			__this_cpu_write(slave_idle, 0);
 			continue;
 		}
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9d92581..d3ee570 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -65,6 +65,7 @@
 #include <asm/cpu.h>
 #include <asm/nmi.h>
 #include <asm/mmu.h>
+#include <asm/tlbflush.h>
 
 #define MAX_IO_MSRS 256
 #define KVM_MAX_MCE_BANKS 32
@@ -5529,6 +5530,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct task_struct *task)
 
 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 
+	handle_slave_tlb_flush(vcpu->cpu);
+
 	if (req_immediate_exit)
 		smp_send_reschedule(vcpu->cpu);
 
@@ -5631,6 +5634,8 @@ static void __vcpu_enter_guest_slave(void *_arg)
 
 		r = vcpu_enter_guest(vcpu, arg->task);
 
+		handle_slave_tlb_flush(cpu);
+
 		if (r <= 0)
 			break;
 
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 613cd83..54f1c1b 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -11,6 +11,7 @@
 #include <asm/mmu_context.h>
 #include <asm/cache.h>
 #include <asm/apic.h>
+#include <asm/nmi.h>
 #include <asm/uv/uv.h>
 #include <linux/debugfs.h>
 
@@ -35,6 +36,10 @@ struct flush_tlb_info {
 	struct mm_struct *flush_mm;
 	unsigned long flush_start;
 	unsigned long flush_end;
+#ifdef CONFIG_SLAVE_CPU
+	cpumask_var_t mask;
+	struct list_head list;
+#endif
 };
 
 /*
@@ -97,6 +102,7 @@ EXPORT_SYMBOL_GPL(leave_mm);
 static void flush_tlb_func(void *info)
 {
 	struct flush_tlb_info *f = info;
+	int cpu = smp_processor_id();
 
 	if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
 		return;
@@ -115,9 +121,94 @@ static void flush_tlb_func(void *info)
 			}
 		}
 	} else
-		leave_mm(smp_processor_id());
+		leave_mm(cpu);
+
+#ifdef CONFIG_SLAVE_CPU
+	if (cpu_slave(cpu))
+		cpumask_test_and_clear_cpu(cpu, f->mask);
+#endif
+}
+
+#ifdef CONFIG_SLAVE_CPU
+static DEFINE_PER_CPU(atomic_t, nr_slave_tlbf);
+DEFINE_PER_CPU(bool, slave_idle);
+static LIST_HEAD(fti_list);
+static DEFINE_RWLOCK(fti_list_lock);
+
+static int slave_tlb_flush_nmi(unsigned int val, struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+
+	if (!cpu_slave(cpu) || !atomic_read(&__get_cpu_var(nr_slave_tlbf)))
+		return NMI_DONE;
+	if (this_cpu_read(slave_idle))
+		handle_slave_tlb_flush(cpu);
+	return NMI_HANDLED;
+}
+
+static int __cpuinit register_slave_tlb_flush_nmi(void)
+{
+	register_nmi_handler(NMI_LOCAL, slave_tlb_flush_nmi,
+			     NMI_FLAG_FIRST, "slave_tlb_flush");
+	return 0;
+}
+late_initcall(register_slave_tlb_flush_nmi);
+
+void handle_slave_tlb_flush(unsigned int cpu)
+{
+	struct flush_tlb_info *info;
 
+	if (!cpu_slave(cpu) ||
+	    !atomic_read(&__get_cpu_var(nr_slave_tlbf)))
+		return;
+
+	read_lock(&fti_list_lock);
+	list_for_each_entry(info, &fti_list, list) {
+		if (cpumask_test_cpu(cpu, info->mask)) {
+			flush_tlb_func(info);
+			atomic_dec(&__get_cpu_var(nr_slave_tlbf));
+		}
+	}
+	read_unlock(&fti_list_lock);
+}
+EXPORT_SYMBOL_GPL(handle_slave_tlb_flush);
+
+static void request_slave_tlb_flush(const struct cpumask *mask,
+				    struct flush_tlb_info *info)
+{
+	int cpu;
+
+	if (!cpumask_intersects(mask, cpu_slave_mask))
+		return;
+
+	if (!alloc_cpumask_var(&info->mask, GFP_ATOMIC)) {
+		pr_err("%s: not enough memory\n", __func__);
+		return;
+	}
+
+	cpumask_and(info->mask, mask, cpu_slave_mask);
+	INIT_LIST_HEAD(&info->list);
+	write_lock(&fti_list_lock);
+	list_add(&info->list, &fti_list);
+	write_unlock(&fti_list_lock);
+
+	for_each_cpu_and(cpu, mask, cpu_slave_mask)
+		atomic_inc(&per_cpu(nr_slave_tlbf, cpu));
+
+	apic->send_IPI_mask(info->mask, NMI_VECTOR);
+	while (!cpumask_empty(info->mask))
+		cpu_relax();
+	write_lock(&fti_list_lock);
+	list_del(&info->list);
+	write_unlock(&fti_list_lock);
+	free_cpumask_var(info->mask);
+}
+#else
+static inline void request_slave_tlb_flush(const struct cpumask *mask,
+					   struct flush_tlb_info *info)
+{
 }
+#endif
 
 void native_flush_tlb_others(const struct cpumask *cpumask,
 				 struct mm_struct *mm, unsigned long start,
@@ -139,6 +230,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
 		return;
 	}
 	smp_call_function_many(cpumask, flush_tlb_func, &info, 1);
+	request_slave_tlb_flush(cpumask, &info);
 }
 
 void flush_tlb_current_task(void)

next prev parent reply	other threads:[~2012-09-06 11:34 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-09-06 11:27 [RFC v2 PATCH 00/21] KVM: x86: CPU isolation and direct interrupts delivery to guests Tomoki Sekiyama
2012-09-06 11:27 ` [RFC v2 PATCH 01/21] x86: Split memory hotplug function from cpu_up() as cpu_memory_up() Tomoki Sekiyama
2012-09-06 11:31   ` Avi Kivity
2012-09-06 11:32     ` Avi Kivity
2012-09-06 11:27 ` [RFC v2 PATCH 02/21] x86: Add a facility to use offlined CPUs as slave CPUs Tomoki Sekiyama
2012-09-06 11:27 ` [RFC v2 PATCH 03/21] x86: Support hrtimer on " Tomoki Sekiyama
2012-09-06 11:27 ` [RFC v2 PATCH 04/21] x86: Avoid RCU warnings " Tomoki Sekiyama
2012-09-20 17:34   ` Paul E. McKenney
2012-09-28  8:10     ` Tomoki Sekiyama
2012-09-06 11:27 ` [RFC v2 PATCH 05/21] KVM: Enable/Disable virtualization on slave CPUs are activated/dying Tomoki Sekiyama
2012-09-06 11:27 ` [RFC v2 PATCH 06/21] KVM: Add facility to run guests on slave CPUs Tomoki Sekiyama
2012-09-06 11:27 ` [RFC v2 PATCH 07/21] KVM: handle page faults of slave guests on online CPUs Tomoki Sekiyama
2012-09-06 11:28 ` [RFC v2 PATCH 08/21] KVM: Add KVM_GET_SLAVE_CPU and KVM_SET_SLAVE_CPU to vCPU ioctl Tomoki Sekiyama
2012-09-06 11:28 ` [RFC v2 PATCH 09/21] KVM: Go back to online CPU on VM exit by external interrupt Tomoki Sekiyama
2012-09-06 11:28 ` [RFC v2 PATCH 10/21] KVM: proxy slab operations for slave CPUs on online CPUs Tomoki Sekiyama
2012-09-06 11:28 ` [RFC v2 PATCH 11/21] KVM: no exiting from guest when slave CPU halted Tomoki Sekiyama
2012-09-06 11:28 ` [RFC v2 PATCH 12/21] x86/apic: Enable external interrupt routing to slave CPUs Tomoki Sekiyama
2012-09-06 11:28 ` [RFC v2 PATCH 13/21] x86/apic: IRQ vector remapping on slave for " Tomoki Sekiyama
2012-09-06 11:28 ` [RFC v2 PATCH 14/21] KVM: Directly handle interrupts by guests without VM EXIT on " Tomoki Sekiyama
2012-09-06 11:28 ` [RFC v2 PATCH 15/21] KVM: add tracepoint on enabling/disabling direct interrupt delivery Tomoki Sekiyama
2012-09-06 11:28 ` [RFC v2 PATCH 16/21] KVM: vmx: Add definitions PIN_BASED_PREEMPTION_TIMER Tomoki Sekiyama
2012-09-06 11:28 ` [RFC v2 PATCH 17/21] KVM: add kvm_arch_vcpu_prevent_run to prevent VM ENTER when NMI is received Tomoki Sekiyama
2012-09-06 11:28 ` [RFC v2 PATCH 18/21] KVM: route assigned devices' MSI/MSI-X directly to guests on slave CPUs Tomoki Sekiyama
2012-09-06 11:28 ` [RFC v2 PATCH 19/21] KVM: Enable direct EOI for directly routed interrupts to guests Tomoki Sekiyama
2012-09-06 11:29 ` [RFC v2 PATCH 20/21] KVM: Pass-through local APIC timer of on slave CPUs to guest VM Tomoki Sekiyama
2012-09-06 11:29 ` Tomoki Sekiyama [this message]
2012-09-06 11:46 ` [RFC v2 PATCH 00/21] KVM: x86: CPU isolation and direct interrupts delivery to guests Avi Kivity
2012-09-07  8:26 ` Jan Kiszka
2012-09-10 11:36   ` Tomoki Sekiyama

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:74a4433 dfblob:bcd637b dfblob:ba7c99b dfblob:9854087
dfblob:9d92581 dfblob:d3ee570 dfblob:613cd83 dfblob:54f1c1b )
 OR (
bs:"[RFC v2 PATCH 21/21] x86: request TLB flush to slave CPU using NMI" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120906112909.13320.88939.stgit@kvmdev \
    --to=tomoki.sekiyama.qu@hitachi.com \
    --cc=avi@redhat.com \
    --cc=hpa@zytor.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=mtosatti@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    --cc=yrl.pp-manager.tt@hitachi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox