From: Marcelo Tosatti <mtosatti@redhat.com>
To: kvm-devel <kvm@vger.kernel.org>, stable@vger.kernel.org
Cc: Paolo Bonzini <pbonzini@redhat.com>,
Andy Lutomirski <luto@amacapital.net>
Subject: x86: kvm: Revert "remove sched notifier for cross-cpu migrations"
Date: Mon, 23 Mar 2015 20:21:51 -0300 [thread overview]
Message-ID: <20150323232151.GA12772@amt.cnet> (raw)
The following point:
2. per-CPU pvclock time info is updated if the
underlying CPU changes.
Is not true anymore since "KVM: x86: update pvclock area conditionally,
on cpu migration".
Add task migration notification back.
Problem noticed by Andy Lutomirski.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
CC: stable@kernel.org # 3.11+
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index d6b078e..25b1cc0 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -95,6 +95,7 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
struct pvclock_vsyscall_time_info {
struct pvclock_vcpu_time_info pvti;
+ u32 migrate_count;
} __attribute__((__aligned__(SMP_CACHE_BYTES)));
#define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info)
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 2f355d2..e5ecd20 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -141,7 +141,46 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
}
+static struct pvclock_vsyscall_time_info *pvclock_vdso_info;
+
+static struct pvclock_vsyscall_time_info *
+pvclock_get_vsyscall_user_time_info(int cpu)
+{
+ if (!pvclock_vdso_info) {
+ BUG();
+ return NULL;
+ }
+
+ return &pvclock_vdso_info[cpu];
+}
+
+struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu)
+{
+ return &pvclock_get_vsyscall_user_time_info(cpu)->pvti;
+}
+
#ifdef CONFIG_X86_64
+static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l,
+ void *v)
+{
+ struct task_migration_notifier *mn = v;
+ struct pvclock_vsyscall_time_info *pvti;
+
+ pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu);
+
+ /* this is NULL when pvclock vsyscall is not initialized */
+ if (unlikely(pvti == NULL))
+ return NOTIFY_DONE;
+
+ pvti->migrate_count++;
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block pvclock_migrate = {
+ .notifier_call = pvclock_task_migrate,
+};
+
/*
* Initialize the generic pvclock vsyscall state. This will allocate
* a/some page(s) for the per-vcpu pvclock information, set up a
@@ -155,12 +194,17 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
+ pvclock_vdso_info = i;
+
for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
__set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
__pa(i) + (idx*PAGE_SIZE),
PAGE_KERNEL_VVAR);
}
+
+ register_task_migration_notifier(&pvclock_migrate);
+
return 0;
}
#endif
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 9793322..3093376 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -82,18 +82,15 @@ static notrace cycle_t vread_pvclock(int *mode)
cycle_t ret;
u64 last;
u32 version;
+ u32 migrate_count;
u8 flags;
unsigned cpu, cpu1;
/*
- * Note: hypervisor must guarantee that:
- * 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
- * 2. that per-CPU pvclock time info is updated if the
- * underlying CPU changes.
- * 3. that version is increased whenever underlying CPU
- * changes.
- *
+ * When looping to get a consistent (time-info, tsc) pair, we
+ * also need to deal with the possibility we can switch vcpus,
+ * so make sure we always re-fetch time-info for the current vcpu.
*/
do {
cpu = __getcpu() & VGETCPU_CPU_MASK;
@@ -104,6 +101,8 @@ static notrace cycle_t vread_pvclock(int *mode)
pvti = get_pvti(cpu);
+ migrate_count = pvti->migrate_count;
+
version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
/*
@@ -115,7 +114,8 @@ static notrace cycle_t vread_pvclock(int *mode)
cpu1 = __getcpu() & VGETCPU_CPU_MASK;
} while (unlikely(cpu != cpu1 ||
(pvti->pvti.version & 1) ||
- pvti->pvti.version != version));
+ pvti->pvti.version != version ||
+ pvti->migrate_count != migrate_count));
if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
*mode = VCLOCK_NONE;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6d77432..be98910 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -176,6 +176,14 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
extern void calc_global_load(unsigned long ticks);
extern void update_cpu_load_nohz(void);
+/* Notifier for when a task gets migrated to a new CPU */
+struct task_migration_notifier {
+ struct task_struct *task;
+ int from_cpu;
+ int to_cpu;
+};
+extern void register_task_migration_notifier(struct notifier_block *n);
+
extern unsigned long get_parent_ip(unsigned long addr);
extern void dump_cpu_task(int cpu);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f0f831e..d0c4209 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -996,6 +996,13 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
rq_clock_skip_update(rq, true);
}
+static ATOMIC_NOTIFIER_HEAD(task_migration_notifier);
+
+void register_task_migration_notifier(struct notifier_block *n)
+{
+ atomic_notifier_chain_register(&task_migration_notifier, n);
+}
+
#ifdef CONFIG_SMP
void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
{
@@ -1026,10 +1033,18 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
trace_sched_migrate_task(p, new_cpu);
if (task_cpu(p) != new_cpu) {
+ struct task_migration_notifier tmn;
+
if (p->sched_class->migrate_task_rq)
p->sched_class->migrate_task_rq(p, new_cpu);
p->se.nr_migrations++;
perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0);
+
+ tmn.task = p;
+ tmn.from_cpu = task_cpu(p);
+ tmn.to_cpu = new_cpu;
+
+ atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn);
}
__set_task_cpu(p, new_cpu);
next reply other threads:[~2015-03-23 23:22 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-03-23 23:21 Marcelo Tosatti [this message]
2015-03-23 23:30 ` x86: kvm: Revert "remove sched notifier for cross-cpu migrations" Andy Lutomirski
2015-03-24 15:34 ` Radim Krčmář
2015-03-24 22:33 ` Andy Lutomirski
2015-03-25 11:08 ` Radim Krčmář
2015-03-25 12:52 ` Radim Krčmář
2015-03-25 21:28 ` Marcelo Tosatti
2015-03-25 22:33 ` Andy Lutomirski
2015-03-25 22:41 ` Marcelo Tosatti
2015-03-25 22:48 ` Andy Lutomirski
2015-03-25 23:13 ` Marcelo Tosatti
2015-03-25 23:22 ` Andy Lutomirski
2015-03-26 11:29 ` Marcelo Tosatti
2015-03-26 18:51 ` Andy Lutomirski
2015-03-26 20:31 ` Radim Krcmar
2015-03-26 20:58 ` Andy Lutomirski
2015-03-26 22:22 ` Andy Lutomirski
2015-03-26 22:56 ` Marcelo Tosatti
2015-03-26 23:09 ` Andy Lutomirski
2015-03-26 23:22 ` Marcelo Tosatti
2015-03-26 23:28 ` Andy Lutomirski
2015-03-26 23:38 ` Marcelo Tosatti
2015-03-26 18:47 ` Andy Lutomirski
2015-03-26 20:10 ` Radim Krčmář
2015-03-26 20:52 ` Paolo Bonzini
2015-03-24 22:59 ` Marcelo Tosatti
2015-03-25 11:09 ` Radim Krčmář
2015-03-25 13:06 ` Radim Krčmář
2015-03-26 20:59 ` Radim Krčmář
2015-03-26 22:22 ` Marcelo Tosatti
2015-03-26 22:24 ` Andy Lutomirski
2015-03-26 22:40 ` Marcelo Tosatti
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20150323232151.GA12772@amt.cnet \
--to=mtosatti@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=luto@amacapital.net \
--cc=pbonzini@redhat.com \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.