* [PATCH v3 14/15] x86/paravirt: move the Xen-only pv_mmu_ops under the PARAVIRT_XXL umbrella
From: Juergen Gross @ 2018-08-28 7:40 UTC (permalink / raw)
To: linux-kernel, xen-devel, x86, virtualization
Cc: Juergen Gross, boris.ostrovsky, rusty, mingo, hpa, akataria, tglx
In-Reply-To: <20180828074026.820-1-jgross@suse.com>
Most of the paravirt ops defined in pv_mmu_ops are for Xen PV guests
only. Define them only if CONFIG_PARAVIRT_XXL is set.
Signed-off-by: Juergen Gross <jgross@suse.com>
---
arch/x86/include/asm/fixmap.h | 2 +-
arch/x86/include/asm/mmu_context.h | 4 +-
arch/x86/include/asm/paravirt.h | 125 +++++++++++++++++-----------------
arch/x86/include/asm/paravirt_types.h | 33 ++++-----
arch/x86/include/asm/pgalloc.h | 2 +-
arch/x86/include/asm/pgtable.h | 7 +-
arch/x86/include/asm/special_insns.h | 11 +--
arch/x86/kernel/asm-offsets.c | 4 +-
arch/x86/kernel/head_64.S | 4 +-
arch/x86/kernel/paravirt.c | 15 ++--
arch/x86/kernel/paravirt_patch_32.c | 4 +-
arch/x86/kernel/paravirt_patch_64.c | 4 +-
12 files changed, 103 insertions(+), 112 deletions(-)
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index e203169931c7..ac80e7eadc3a 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -152,7 +152,7 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte);
void native_set_fixmap(enum fixed_addresses idx,
phys_addr_t phys, pgprot_t flags);
-#ifndef CONFIG_PARAVIRT
+#ifndef CONFIG_PARAVIRT_XXL
static inline void __set_fixmap(enum fixed_addresses idx,
phys_addr_t phys, pgprot_t flags)
{
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index eeeb9289c764..0ca50611e8ce 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -16,12 +16,12 @@
extern atomic64_t last_mm_ctx_id;
-#ifndef CONFIG_PARAVIRT
+#ifndef CONFIG_PARAVIRT_XXL
static inline void paravirt_activate_mm(struct mm_struct *prev,
struct mm_struct *next)
{
}
-#endif /* !CONFIG_PARAVIRT */
+#endif /* !CONFIG_PARAVIRT_XXL */
#ifdef CONFIG_PERF_EVENTS
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 512433c56c33..63ab58dc5b73 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -17,6 +17,62 @@
#include <linux/cpumask.h>
#include <asm/frame.h>
+static inline unsigned long long paravirt_sched_clock(void)
+{
+ return PVOP_CALL0(unsigned long long, time.sched_clock);
+}
+
+struct static_key;
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
+
+static inline u64 paravirt_steal_clock(int cpu)
+{
+ return PVOP_CALL1(u64, time.steal_clock, cpu);
+}
+
+/* The paravirtualized I/O functions */
+static inline void slow_down_io(void)
+{
+ pv_ops.cpu.io_delay();
+#ifdef REALLY_SLOW_IO
+ pv_ops.cpu.io_delay();
+ pv_ops.cpu.io_delay();
+ pv_ops.cpu.io_delay();
+#endif
+}
+
+static inline void __flush_tlb(void)
+{
+ PVOP_VCALL0(mmu.flush_tlb_user);
+}
+
+static inline void __flush_tlb_global(void)
+{
+ PVOP_VCALL0(mmu.flush_tlb_kernel);
+}
+
+static inline void __flush_tlb_one_user(unsigned long addr)
+{
+ PVOP_VCALL1(mmu.flush_tlb_one_user, addr);
+}
+
+static inline void flush_tlb_others(const struct cpumask *cpumask,
+ const struct flush_tlb_info *info)
+{
+ PVOP_VCALL2(mmu.flush_tlb_others, cpumask, info);
+}
+
+static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
+{
+ PVOP_VCALL2(mmu.tlb_remove_table, tlb, table);
+}
+
+static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
+{
+ PVOP_VCALL1(mmu.exit_mmap, mm);
+}
+
#ifdef CONFIG_PARAVIRT_XXL
static inline void load_sp0(unsigned long sp0)
{
@@ -52,7 +108,6 @@ static inline void write_cr0(unsigned long x)
{
PVOP_VCALL1(cpu.write_cr0, x);
}
-#endif
static inline unsigned long read_cr2(void)
{
@@ -74,7 +129,6 @@ static inline void write_cr3(unsigned long x)
PVOP_VCALL1(mmu.write_cr3, x);
}
-#ifdef CONFIG_PARAVIRT_XXL
static inline void __write_cr4(unsigned long x)
{
PVOP_VCALL1(cpu.write_cr4, x);
@@ -172,23 +226,7 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
*p = paravirt_read_msr_safe(msr, &err);
return err;
}
-#endif
-static inline unsigned long long paravirt_sched_clock(void)
-{
- return PVOP_CALL0(unsigned long long, time.sched_clock);
-}
-
-struct static_key;
-extern struct static_key paravirt_steal_enabled;
-extern struct static_key paravirt_steal_rq_enabled;
-
-static inline u64 paravirt_steal_clock(int cpu)
-{
- return PVOP_CALL1(u64, time.steal_clock, cpu);
-}
-
-#ifdef CONFIG_PARAVIRT_XXL
static inline unsigned long long paravirt_read_pmc(int counter)
{
return PVOP_CALL1(u64, cpu.read_pmc, counter);
@@ -267,18 +305,6 @@ static inline void set_iopl_mask(unsigned mask)
{
PVOP_VCALL1(cpu.set_iopl_mask, mask);
}
-#endif
-
-/* The paravirtualized I/O functions */
-static inline void slow_down_io(void)
-{
- pv_ops.cpu.io_delay();
-#ifdef REALLY_SLOW_IO
- pv_ops.cpu.io_delay();
- pv_ops.cpu.io_delay();
- pv_ops.cpu.io_delay();
-#endif
-}
static inline void paravirt_activate_mm(struct mm_struct *prev,
struct mm_struct *next)
@@ -292,35 +318,6 @@ static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm,
PVOP_VCALL2(mmu.dup_mmap, oldmm, mm);
}
-static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
-{
- PVOP_VCALL1(mmu.exit_mmap, mm);
-}
-
-static inline void __flush_tlb(void)
-{
- PVOP_VCALL0(mmu.flush_tlb_user);
-}
-static inline void __flush_tlb_global(void)
-{
- PVOP_VCALL0(mmu.flush_tlb_kernel);
-}
-static inline void __flush_tlb_one_user(unsigned long addr)
-{
- PVOP_VCALL1(mmu.flush_tlb_one_user, addr);
-}
-
-static inline void flush_tlb_others(const struct cpumask *cpumask,
- const struct flush_tlb_info *info)
-{
- PVOP_VCALL2(mmu.flush_tlb_others, cpumask, info);
-}
-
-static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
-{
- PVOP_VCALL2(mmu.tlb_remove_table, tlb, table);
-}
-
static inline int paravirt_pgd_alloc(struct mm_struct *mm)
{
return PVOP_CALL1(int, mmu.pgd_alloc, mm);
@@ -623,7 +620,6 @@ static inline void pmd_clear(pmd_t *pmdp)
}
#endif /* CONFIG_X86_PAE */
-#ifdef CONFIG_PARAVIRT_XXL
#define __HAVE_ARCH_START_CONTEXT_SWITCH
static inline void arch_start_context_switch(struct task_struct *prev)
{
@@ -634,7 +630,6 @@ static inline void arch_end_context_switch(struct task_struct *next)
{
PVOP_VCALL1(cpu.end_context_switch, next);
}
-#endif
#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
static inline void arch_enter_lazy_mmu_mode(void)
@@ -657,6 +652,7 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
{
pv_ops.mmu.set_fixmap(idx, phys, flags);
}
+#endif
#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
@@ -948,15 +944,20 @@ extern void default_banner(void);
#endif /* __ASSEMBLY__ */
#else /* CONFIG_PARAVIRT */
# define default_banner x86_init_noop
+#endif /* !CONFIG_PARAVIRT */
+
#ifndef __ASSEMBLY__
+#ifndef CONFIG_PARAVIRT_XXL
static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm,
struct mm_struct *mm)
{
}
+#endif
+#ifndef CONFIG_PARAVIRT
static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
{
}
+#endif
#endif /* __ASSEMBLY__ */
-#endif /* !CONFIG_PARAVIRT */
#endif /* _ASM_X86_PARAVIRT_H */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 44738bf39584..f95b7228c615 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -91,13 +91,14 @@ struct pv_init_ops {
unsigned long addr, unsigned len);
} __no_randomize_layout;
-
+#ifdef CONFIG_PARAVIRT_XXL
struct pv_lazy_ops {
/* Set deferred update mode, used for batching operations. */
void (*enter)(void);
void (*leave)(void);
void (*flush)(void);
} __no_randomize_layout;
+#endif
struct pv_time_ops {
unsigned long long (*sched_clock)(void);
@@ -205,31 +206,30 @@ struct pv_irq_ops {
} __no_randomize_layout;
struct pv_mmu_ops {
+ /* TLB operations */
+ void (*flush_tlb_user)(void);
+ void (*flush_tlb_kernel)(void);
+ void (*flush_tlb_one_user)(unsigned long addr);
+ void (*flush_tlb_others)(const struct cpumask *cpus,
+ const struct flush_tlb_info *info);
+
+ void (*tlb_remove_table)(struct mmu_gather *tlb, void *table);
+
+ /* Hook for intercepting the destruction of an mm_struct. */
+ void (*exit_mmap)(struct mm_struct *mm);
+
+#ifdef CONFIG_PARAVIRT_XXL
unsigned long (*read_cr2)(void);
void (*write_cr2)(unsigned long);
unsigned long (*read_cr3)(void);
void (*write_cr3)(unsigned long);
- /*
- * Hooks for intercepting the creation/use/destruction of an
- * mm_struct.
- */
+ /* Hooks for intercepting the creation/use of an mm_struct. */
void (*activate_mm)(struct mm_struct *prev,
struct mm_struct *next);
void (*dup_mmap)(struct mm_struct *oldmm,
struct mm_struct *mm);
- void (*exit_mmap)(struct mm_struct *mm);
-
-
- /* TLB operations */
- void (*flush_tlb_user)(void);
- void (*flush_tlb_kernel)(void);
- void (*flush_tlb_one_user)(unsigned long addr);
- void (*flush_tlb_others)(const struct cpumask *cpus,
- const struct flush_tlb_info *info);
-
- void (*tlb_remove_table)(struct mmu_gather *tlb, void *table);
/* Hooks for allocating and freeing a pagetable top-level */
int (*pgd_alloc)(struct mm_struct *mm);
@@ -304,6 +304,7 @@ struct pv_mmu_ops {
an mfn. We can tell which is which from the index. */
void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx,
phys_addr_t phys, pgprot_t flags);
+#endif
} __no_randomize_layout;
struct arch_spinlock;
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index fbd578daa66e..ec7f43327033 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -8,7 +8,7 @@
static inline int __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; }
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
#include <asm/paravirt.h>
#else
#define paravirt_pgd_alloc(mm) __paravirt_pgd_alloc(mm)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 5bb0fe3b7e00..7b0489ca027a 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -55,9 +55,9 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
extern pmdval_t early_pmd_flags;
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
#include <asm/paravirt.h>
-#else /* !CONFIG_PARAVIRT */
+#else /* !CONFIG_PARAVIRT_XXL */
#define set_pte(ptep, pte) native_set_pte(ptep, pte)
#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte)
@@ -111,9 +111,6 @@ extern pmdval_t early_pmd_flags;
#define pte_val(x) native_pte_val(x)
#define __pte(x) native_make_pte(x)
-#endif /* CONFIG_PARAVIRT */
-
-#ifndef CONFIG_PARAVIRT_XXL
#define arch_end_context_switch(prev) do {} while(0)
#endif /* CONFIG_PARAVIRT_XXL */
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 2aa6ce4bf159..43c029cdc3fe 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -141,11 +141,10 @@ static inline unsigned long __read_cr4(void)
return native_read_cr4();
}
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
#include <asm/paravirt.h>
-#endif
+#else
-#ifndef CONFIG_PARAVIRT_XXL
static inline unsigned long read_cr0(void)
{
return native_read_cr0();
@@ -155,9 +154,7 @@ static inline void write_cr0(unsigned long x)
{
native_write_cr0(x);
}
-#endif
-#ifndef CONFIG_PARAVIRT
static inline unsigned long read_cr2(void)
{
return native_read_cr2();
@@ -181,9 +178,7 @@ static inline void write_cr3(unsigned long x)
{
native_write_cr3(x);
}
-#endif
-#ifndef CONFIG_PARAVIRT_XXL
static inline void __write_cr4(unsigned long x)
{
native_write_cr4(x);
@@ -213,7 +208,7 @@ static inline void load_gs_index(unsigned selector)
#endif
-#endif/* CONFIG_PARAVIRT_XXL */
+#endif /* CONFIG_PARAVIRT_XXL */
static inline void clflush(volatile void *__p)
{
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 28e7572ff74d..fc02c3cf238f 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -64,13 +64,11 @@ void common(void) {
OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
#endif
-#ifdef CONFIG_PARAVIRT
- BLANK();
#ifdef CONFIG_PARAVIRT_XXL
+ BLANK();
OFFSET(PV_IRQ_irq_disable, paravirt_patch_template, irq.irq_disable);
OFFSET(PV_IRQ_irq_enable, paravirt_patch_template, irq.irq_enable);
OFFSET(PV_CPU_iret, paravirt_patch_template, cpu.iret);
-#endif
OFFSET(PV_MMU_read_cr2, paravirt_patch_template, mmu.read_cr2);
#endif
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index a5bd72a0ee1a..827bca2c2782 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -25,14 +25,12 @@
#include <asm/export.h>
#include <asm/nospec-branch.h>
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
#include <asm/asm-offsets.h>
#include <asm/paravirt.h>
#define GET_CR2_INTO(reg) GET_CR2_INTO_RAX ; movq %rax, reg
#else
#define GET_CR2_INTO(reg) movq %cr2, reg
-#endif
-#ifndef CONFIG_PARAVIRT_XXL
#define INTERRUPT_RETURN iretq
#endif
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index d48124be3b01..afa9a2a5acdc 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -384,11 +384,6 @@ struct paravirt_patch_template pv_ops = {
#endif
/* Mmu ops. */
- .mmu.read_cr2 = native_read_cr2,
- .mmu.write_cr2 = native_write_cr2,
- .mmu.read_cr3 = __native_read_cr3,
- .mmu.write_cr3 = native_write_cr3,
-
.mmu.flush_tlb_user = native_flush_tlb,
.mmu.flush_tlb_kernel = native_flush_tlb_global,
.mmu.flush_tlb_one_user = native_flush_tlb_one_user,
@@ -396,6 +391,14 @@ struct paravirt_patch_template pv_ops = {
.mmu.tlb_remove_table =
(void (*)(struct mmu_gather *, void *))tlb_remove_page,
+ .mmu.exit_mmap = paravirt_nop,
+
+#ifdef CONFIG_PARAVIRT_XXL
+ .mmu.read_cr2 = native_read_cr2,
+ .mmu.write_cr2 = native_write_cr2,
+ .mmu.read_cr3 = __native_read_cr3,
+ .mmu.write_cr3 = native_write_cr3,
+
.mmu.pgd_alloc = __paravirt_pgd_alloc,
.mmu.pgd_free = paravirt_nop,
@@ -448,7 +451,6 @@ struct paravirt_patch_template pv_ops = {
.mmu.make_pgd = PTE_IDENT,
.mmu.dup_mmap = paravirt_nop,
- .mmu.exit_mmap = paravirt_nop,
.mmu.activate_mm = paravirt_nop,
.mmu.lazy_mode = {
@@ -458,6 +460,7 @@ struct paravirt_patch_template pv_ops = {
},
.mmu.set_fixmap = native_set_fixmap,
+#endif
#if defined(CONFIG_PARAVIRT_SPINLOCKS)
/* Lock ops. */
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index 1d44705c6528..d460cbcabcfe 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -7,10 +7,10 @@ DEF_NATIVE(irq, irq_enable, "sti");
DEF_NATIVE(irq, restore_fl, "push %eax; popf");
DEF_NATIVE(irq, save_fl, "pushf; pop %eax");
DEF_NATIVE(cpu, iret, "iret");
-#endif
DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax");
DEF_NATIVE(mmu, write_cr3, "mov %eax, %cr3");
DEF_NATIVE(mmu, read_cr3, "mov %cr3, %eax");
+#endif
#if defined(CONFIG_PARAVIRT_SPINLOCKS)
DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)");
@@ -49,10 +49,10 @@ unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
PATCH_SITE(irq, restore_fl);
PATCH_SITE(irq, save_fl);
PATCH_SITE(cpu, iret);
-#endif
PATCH_SITE(mmu, read_cr2);
PATCH_SITE(mmu, read_cr3);
PATCH_SITE(mmu, write_cr3);
+#endif
#if defined(CONFIG_PARAVIRT_SPINLOCKS)
case PARAVIRT_PATCH(lock.queued_spin_unlock):
if (pv_is_native_spin_unlock()) {
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index b00937963a0f..5ad5bcda9dc6 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -8,11 +8,9 @@ DEF_NATIVE(irq, irq_disable, "cli");
DEF_NATIVE(irq, irq_enable, "sti");
DEF_NATIVE(irq, restore_fl, "pushq %rdi; popfq");
DEF_NATIVE(irq, save_fl, "pushfq; popq %rax");
-#endif
DEF_NATIVE(mmu, read_cr2, "movq %cr2, %rax");
DEF_NATIVE(mmu, read_cr3, "movq %cr3, %rax");
DEF_NATIVE(mmu, write_cr3, "movq %rdi, %cr3");
-#ifdef CONFIG_PARAVIRT_XXL
DEF_NATIVE(cpu, wbinvd, "wbinvd");
DEF_NATIVE(cpu, usergs_sysret64, "swapgs; sysretq");
@@ -61,10 +59,10 @@ unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
PATCH_SITE(cpu, usergs_sysret64);
PATCH_SITE(cpu, swapgs);
PATCH_SITE(cpu, wbinvd);
-#endif
PATCH_SITE(mmu, read_cr2);
PATCH_SITE(mmu, read_cr3);
PATCH_SITE(mmu, write_cr3);
+#endif
#if defined(CONFIG_PARAVIRT_SPINLOCKS)
case PARAVIRT_PATCH(lock.queued_spin_unlock):
if (pv_is_native_spin_unlock()) {
--
2.16.4
^ permalink raw reply related
* [PATCH v3 13/15] x86/paravirt: move the pv_irq_ops under the PARAVIRT_XXL umbrella
From: Juergen Gross @ 2018-08-28 7:40 UTC (permalink / raw)
To: linux-kernel, xen-devel, x86, virtualization
Cc: Juergen Gross, boris.ostrovsky, rusty, mingo, hpa, akataria, tglx
In-Reply-To: <20180828074026.820-1-jgross@suse.com>
All of the paravirt ops defined in pv_irq_ops are for Xen PV guests
or VSMP only. Define them only if CONFIG_PARAVIRT_XXL is set.
Signed-off-by: Juergen Gross <jgross@suse.com>
---
V3:
- put all irq ops under XXL (Peter Zijlstra)
---
arch/x86/include/asm/irqflags.h | 8 +-------
arch/x86/include/asm/paravirt.h | 6 +++---
arch/x86/include/asm/paravirt_types.h | 3 ++-
arch/x86/kernel/asm-offsets.c | 2 +-
arch/x86/kernel/asm-offsets_64.c | 2 +-
arch/x86/kernel/paravirt.c | 2 +-
arch/x86/kernel/paravirt_patch_32.c | 4 ++--
arch/x86/kernel/paravirt_patch_64.c | 4 +++-
arch/x86/kernel/vsmp_64.c | 2 +-
9 files changed, 15 insertions(+), 18 deletions(-)
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index c5e74ba2c95e..d84517184841 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -63,7 +63,7 @@ static inline __cpuidle void native_halt(void)
#endif
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
#include <asm/paravirt.h>
#else
#ifndef __ASSEMBLY__
@@ -125,13 +125,7 @@ static inline notrace unsigned long arch_local_irq_save(void)
#ifdef CONFIG_DEBUG_ENTRY
#define SAVE_FLAGS(x) pushfq; popq %rax
#endif
-#endif
-#endif /* __ASSEMBLY__ */
-#endif /* CONFIG_PARAVIRT */
-#ifndef CONFIG_PARAVIRT_XXL
-#ifdef __ASSEMBLY__
-#ifdef CONFIG_X86_64
#define SWAPGS swapgs
/*
* Currently paravirt can't handle swapgs nicely when we
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 686a68004b5f..512433c56c33 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -91,7 +91,6 @@ static inline void write_cr8(unsigned long x)
PVOP_VCALL1(cpu.write_cr8, x);
}
#endif
-#endif
static inline void arch_safe_halt(void)
{
@@ -103,7 +102,6 @@ static inline void halt(void)
PVOP_VCALL0(irq.halt);
}
-#ifdef CONFIG_PARAVIRT_XXL
static inline void wbinvd(void)
{
PVOP_VCALL0(cpu.wbinvd);
@@ -770,6 +768,7 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
#define __PV_IS_CALLEE_SAVE(func) \
((struct paravirt_callee_save) { func })
+#ifdef CONFIG_PARAVIRT_XXL
static inline notrace unsigned long arch_local_save_flags(void)
{
return PVOP_CALLEE0(unsigned long, irq.save_fl);
@@ -798,6 +797,7 @@ static inline notrace unsigned long arch_local_irq_save(void)
arch_local_irq_disable();
return f;
}
+#endif
/* Make sure as little as possible of this mess escapes. */
@@ -884,7 +884,6 @@ extern void default_banner(void);
PARA_SITE(PARA_PATCH(PV_CPU_iret), \
ANNOTATE_RETPOLINE_SAFE; \
jmp PARA_INDIRECT(pv_ops+PV_CPU_iret);)
-#endif
#define DISABLE_INTERRUPTS(clobbers) \
PARA_SITE(PARA_PATCH(PV_IRQ_irq_disable), \
@@ -899,6 +898,7 @@ extern void default_banner(void);
ANNOTATE_RETPOLINE_SAFE; \
call PARA_INDIRECT(pv_ops+PV_IRQ_irq_enable); \
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
+#endif
#ifdef CONFIG_X86_64
#ifdef CONFIG_PARAVIRT_XXL
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 30ee6538442c..44738bf39584 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -184,6 +184,7 @@ struct pv_cpu_ops {
} __no_randomize_layout;
struct pv_irq_ops {
+#ifdef CONFIG_PARAVIRT_XXL
/*
* Get/set interrupt state. save_fl and restore_fl are only
* expected to use X86_EFLAGS_IF; all other bits
@@ -200,7 +201,7 @@ struct pv_irq_ops {
void (*safe_halt)(void);
void (*halt)(void);
-
+#endif
} __no_randomize_layout;
struct pv_mmu_ops {
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 0fe233d98d17..28e7572ff74d 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -66,9 +66,9 @@ void common(void) {
#ifdef CONFIG_PARAVIRT
BLANK();
+#ifdef CONFIG_PARAVIRT_XXL
OFFSET(PV_IRQ_irq_disable, paravirt_patch_template, irq.irq_disable);
OFFSET(PV_IRQ_irq_enable, paravirt_patch_template, irq.irq_enable);
-#ifdef CONFIG_PARAVIRT_XXL
OFFSET(PV_CPU_iret, paravirt_patch_template, cpu.iret);
#endif
OFFSET(PV_MMU_read_cr2, paravirt_patch_template, mmu.read_cr2);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 2a15d420a84d..ddced33184b5 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -25,9 +25,9 @@ int main(void)
OFFSET(PV_CPU_usergs_sysret64, paravirt_patch_template,
cpu.usergs_sysret64);
OFFSET(PV_CPU_swapgs, paravirt_patch_template, cpu.swapgs);
-#endif
#ifdef CONFIG_DEBUG_ENTRY
OFFSET(PV_IRQ_save_fl, paravirt_patch_template, irq.save_fl);
+#endif
#endif
BLANK();
#endif
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index d29b46621543..d48124be3b01 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -373,7 +373,6 @@ struct paravirt_patch_template pv_ops = {
.cpu.start_context_switch = paravirt_nop,
.cpu.end_context_switch = paravirt_nop,
-#endif
/* Irq ops. */
.irq.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
@@ -382,6 +381,7 @@ struct paravirt_patch_template pv_ops = {
.irq.irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
.irq.safe_halt = native_safe_halt,
.irq.halt = native_halt,
+#endif
/* Mmu ops. */
.mmu.read_cr2 = native_read_cr2,
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index 5a20aa56efc0..1d44705c6528 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -1,11 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
#include <asm/paravirt.h>
+#ifdef CONFIG_PARAVIRT_XXL
DEF_NATIVE(irq, irq_disable, "cli");
DEF_NATIVE(irq, irq_enable, "sti");
DEF_NATIVE(irq, restore_fl, "push %eax; popf");
DEF_NATIVE(irq, save_fl, "pushf; pop %eax");
-#ifdef CONFIG_PARAVIRT_XXL
DEF_NATIVE(cpu, iret, "iret");
#endif
DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax");
@@ -43,11 +43,11 @@ unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
end = end_##ops##_##x; \
goto patch_site
switch (type) {
+#ifdef CONFIG_PARAVIRT_XXL
PATCH_SITE(irq, irq_disable);
PATCH_SITE(irq, irq_enable);
PATCH_SITE(irq, restore_fl);
PATCH_SITE(irq, save_fl);
-#ifdef CONFIG_PARAVIRT_XXL
PATCH_SITE(cpu, iret);
#endif
PATCH_SITE(mmu, read_cr2);
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 461aba038ada..b00937963a0f 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -3,10 +3,12 @@
#include <asm/asm-offsets.h>
#include <linux/stringify.h>
+#ifdef CONFIG_PARAVIRT_XXL
DEF_NATIVE(irq, irq_disable, "cli");
DEF_NATIVE(irq, irq_enable, "sti");
DEF_NATIVE(irq, restore_fl, "pushq %rdi; popfq");
DEF_NATIVE(irq, save_fl, "pushfq; popq %rax");
+#endif
DEF_NATIVE(mmu, read_cr2, "movq %cr2, %rax");
DEF_NATIVE(mmu, read_cr3, "movq %cr3, %rax");
DEF_NATIVE(mmu, write_cr3, "movq %rdi, %cr3");
@@ -51,11 +53,11 @@ unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
end = end_##ops##_##x; \
goto patch_site
switch(type) {
+#ifdef CONFIG_PARAVIRT_XXL
PATCH_SITE(irq, restore_fl);
PATCH_SITE(irq, save_fl);
PATCH_SITE(irq, irq_enable);
PATCH_SITE(irq, irq_disable);
-#ifdef CONFIG_PARAVIRT_XXL
PATCH_SITE(cpu, usergs_sysret64);
PATCH_SITE(cpu, swapgs);
PATCH_SITE(cpu, wbinvd);
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index 789918d78697..1eae5af491c2 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -26,7 +26,7 @@
#define TOPOLOGY_REGISTER_OFFSET 0x10
-#if defined CONFIG_PCI && defined CONFIG_PARAVIRT
+#if defined CONFIG_PCI && defined CONFIG_PARAVIRT_XXL
/*
* Interrupt control on vSMPowered systems:
* ~AC is a shadow of IF. If IF is 'on' AC should be 'off'
--
2.16.4
^ permalink raw reply related
* [PATCH v3 12/15] x86/paravirt: move the Xen-only pv_cpu_ops under the PARAVIRT_XXL umbrella
From: Juergen Gross @ 2018-08-28 7:40 UTC (permalink / raw)
To: linux-kernel, xen-devel, x86, virtualization
Cc: Juergen Gross, boris.ostrovsky, rusty, mingo, hpa, akataria, tglx
In-Reply-To: <20180828074026.820-1-jgross@suse.com>
Most of the paravirt ops defined in pv_cpu_ops are for Xen PV guests
only. Define them only if CONFIG_PARAVIRT_XXL is set.
Signed-off-by: Juergen Gross <jgross@suse.com>
---
arch/x86/include/asm/debugreg.h | 2 +-
arch/x86/include/asm/desc.h | 4 ++--
arch/x86/include/asm/irqflags.h | 16 +++++++++++-----
arch/x86/include/asm/msr.h | 4 ++--
arch/x86/include/asm/paravirt.h | 19 +++++++++++++++++--
arch/x86/include/asm/paravirt_types.h | 5 ++++-
arch/x86/include/asm/pgtable.h | 6 ++++--
arch/x86/include/asm/processor.h | 4 ++--
arch/x86/include/asm/special_insns.h | 9 +++++++--
arch/x86/kernel/asm-offsets.c | 2 ++
arch/x86/kernel/asm-offsets_64.c | 2 ++
arch/x86/kernel/cpu/common.c | 2 +-
arch/x86/kernel/head_64.S | 2 ++
arch/x86/kernel/paravirt.c | 13 ++++++++++++-
arch/x86/kernel/paravirt_patch_32.c | 4 ++++
arch/x86/kernel/paravirt_patch_64.c | 6 +++++-
16 files changed, 78 insertions(+), 22 deletions(-)
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 4505ac2735ad..9e5ca30738e5 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -8,7 +8,7 @@
DECLARE_PER_CPU(unsigned long, cpu_dr7);
-#ifndef CONFIG_PARAVIRT
+#ifndef CONFIG_PARAVIRT_XXL
/*
* These special macros can be used to get or set a debugging register
*/
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 13c5ee878a47..68a99d2a5f33 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -108,7 +108,7 @@ static inline int desc_empty(const void *ptr)
return !(desc[0] | desc[1]);
}
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
#include <asm/paravirt.h>
#else
#define load_TR_desc() native_load_tr_desc()
@@ -134,7 +134,7 @@ static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
{
}
-#endif /* CONFIG_PARAVIRT */
+#endif /* CONFIG_PARAVIRT_XXL */
#define store_ldt(ldt) asm("sldt %0" : "=m"(ldt))
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 250bcd40d769..c5e74ba2c95e 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -121,6 +121,16 @@ static inline notrace unsigned long arch_local_irq_save(void)
#define ENABLE_INTERRUPTS(x) sti
#define DISABLE_INTERRUPTS(x) cli
+#ifdef CONFIG_X86_64
+#ifdef CONFIG_DEBUG_ENTRY
+#define SAVE_FLAGS(x) pushfq; popq %rax
+#endif
+#endif
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_PARAVIRT */
+
+#ifndef CONFIG_PARAVIRT_XXL
+#ifdef __ASSEMBLY__
#ifdef CONFIG_X86_64
#define SWAPGS swapgs
/*
@@ -142,16 +152,12 @@ static inline notrace unsigned long arch_local_irq_save(void)
swapgs; \
sysretl
-#ifdef CONFIG_DEBUG_ENTRY
-#define SAVE_FLAGS(x) pushfq; popq %rax
-#endif
#else
#define INTERRUPT_RETURN iret
#endif
-
#endif /* __ASSEMBLY__ */
-#endif /* CONFIG_PARAVIRT */
+#endif /* CONFIG_PARAVIRT_XXL */
#ifndef __ASSEMBLY__
static inline int arch_irqs_disabled_flags(unsigned long flags)
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 04addd6e0a4a..91e4cf189914 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -242,7 +242,7 @@ static inline unsigned long long native_read_pmc(int counter)
return EAX_EDX_VAL(val, low, high);
}
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
#include <asm/paravirt.h>
#else
#include <linux/errno.h>
@@ -305,7 +305,7 @@ do { \
#define rdpmcl(counter, val) ((val) = native_read_pmc(counter))
-#endif /* !CONFIG_PARAVIRT */
+#endif /* !CONFIG_PARAVIRT_XXL */
/*
* 64-bit version of wrmsr_safe():
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index dbaedf1ed622..686a68004b5f 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -17,6 +17,7 @@
#include <linux/cpumask.h>
#include <asm/frame.h>
+#ifdef CONFIG_PARAVIRT_XXL
static inline void load_sp0(unsigned long sp0)
{
PVOP_VCALL1(cpu.load_sp0, sp0);
@@ -51,6 +52,7 @@ static inline void write_cr0(unsigned long x)
{
PVOP_VCALL1(cpu.write_cr0, x);
}
+#endif
static inline unsigned long read_cr2(void)
{
@@ -72,6 +74,7 @@ static inline void write_cr3(unsigned long x)
PVOP_VCALL1(mmu.write_cr3, x);
}
+#ifdef CONFIG_PARAVIRT_XXL
static inline void __write_cr4(unsigned long x)
{
PVOP_VCALL1(cpu.write_cr4, x);
@@ -88,6 +91,7 @@ static inline void write_cr8(unsigned long x)
PVOP_VCALL1(cpu.write_cr8, x);
}
#endif
+#endif
static inline void arch_safe_halt(void)
{
@@ -99,14 +103,13 @@ static inline void halt(void)
PVOP_VCALL0(irq.halt);
}
+#ifdef CONFIG_PARAVIRT_XXL
static inline void wbinvd(void)
{
PVOP_VCALL0(cpu.wbinvd);
}
-#ifdef CONFIG_PARAVIRT_XXL
#define get_kernel_rpl() (pv_info.kernel_rpl)
-#endif
static inline u64 paravirt_read_msr(unsigned msr)
{
@@ -171,6 +174,7 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
*p = paravirt_read_msr_safe(msr, &err);
return err;
}
+#endif
static inline unsigned long long paravirt_sched_clock(void)
{
@@ -186,6 +190,7 @@ static inline u64 paravirt_steal_clock(int cpu)
return PVOP_CALL1(u64, time.steal_clock, cpu);
}
+#ifdef CONFIG_PARAVIRT_XXL
static inline unsigned long long paravirt_read_pmc(int counter)
{
return PVOP_CALL1(u64, cpu.read_pmc, counter);
@@ -230,6 +235,7 @@ static inline unsigned long paravirt_store_tr(void)
{
return PVOP_CALL0(unsigned long, cpu.store_tr);
}
+
#define store_tr(tr) ((tr) = paravirt_store_tr())
static inline void load_TLS(struct thread_struct *t, unsigned cpu)
{
@@ -263,6 +269,7 @@ static inline void set_iopl_mask(unsigned mask)
{
PVOP_VCALL1(cpu.set_iopl_mask, mask);
}
+#endif
/* The paravirtualized I/O functions */
static inline void slow_down_io(void)
@@ -618,6 +625,7 @@ static inline void pmd_clear(pmd_t *pmdp)
}
#endif /* CONFIG_X86_PAE */
+#ifdef CONFIG_PARAVIRT_XXL
#define __HAVE_ARCH_START_CONTEXT_SWITCH
static inline void arch_start_context_switch(struct task_struct *prev)
{
@@ -628,6 +636,7 @@ static inline void arch_end_context_switch(struct task_struct *next)
{
PVOP_VCALL1(cpu.end_context_switch, next);
}
+#endif
#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
static inline void arch_enter_lazy_mmu_mode(void)
@@ -870,10 +879,12 @@ extern void default_banner(void);
#define PARA_INDIRECT(addr) *%cs:addr
#endif
+#ifdef CONFIG_PARAVIRT_XXL
#define INTERRUPT_RETURN \
PARA_SITE(PARA_PATCH(PV_CPU_iret), \
ANNOTATE_RETPOLINE_SAFE; \
jmp PARA_INDIRECT(pv_ops+PV_CPU_iret);)
+#endif
#define DISABLE_INTERRUPTS(clobbers) \
PARA_SITE(PARA_PATCH(PV_IRQ_irq_disable), \
@@ -890,6 +901,7 @@ extern void default_banner(void);
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
#ifdef CONFIG_X86_64
+#ifdef CONFIG_PARAVIRT_XXL
/*
* If swapgs is used while the userspace stack is still current,
* there's no way to call a pvop. The PV replacement *must* be
@@ -909,15 +921,18 @@ extern void default_banner(void);
ANNOTATE_RETPOLINE_SAFE; \
call PARA_INDIRECT(pv_ops+PV_CPU_swapgs); \
)
+#endif
#define GET_CR2_INTO_RAX \
ANNOTATE_RETPOLINE_SAFE; \
call PARA_INDIRECT(pv_ops+PV_MMU_read_cr2);
+#ifdef CONFIG_PARAVIRT_XXL
#define USERGS_SYSRET64 \
PARA_SITE(PARA_PATCH(PV_CPU_usergs_sysret64), \
ANNOTATE_RETPOLINE_SAFE; \
jmp PARA_INDIRECT(pv_ops+PV_CPU_usergs_sysret64);)
+#endif
#ifdef CONFIG_DEBUG_ENTRY
#define SAVE_FLAGS(clobbers) \
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 98e1110354b8..30ee6538442c 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -106,6 +106,9 @@ struct pv_time_ops {
struct pv_cpu_ops {
/* hooks for various privileged instructions */
+ void (*io_delay)(void);
+
+#ifdef CONFIG_PARAVIRT_XXL
unsigned long (*get_debugreg)(int regno);
void (*set_debugreg)(int regno, unsigned long value);
@@ -143,7 +146,6 @@ struct pv_cpu_ops {
void (*set_iopl_mask)(unsigned mask);
void (*wbinvd)(void);
- void (*io_delay)(void);
/* cpuid emulation, mostly so that caps bits can be disabled */
void (*cpuid)(unsigned int *eax, unsigned int *ebx,
@@ -178,6 +180,7 @@ struct pv_cpu_ops {
void (*start_context_switch)(struct task_struct *prev);
void (*end_context_switch)(struct task_struct *next);
+#endif
} __no_randomize_layout;
struct pv_irq_ops {
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index e4ffa565a69f..5bb0fe3b7e00 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -111,10 +111,12 @@ extern pmdval_t early_pmd_flags;
#define pte_val(x) native_pte_val(x)
#define __pte(x) native_make_pte(x)
-#define arch_end_context_switch(prev) do {} while(0)
-
#endif /* CONFIG_PARAVIRT */
+#ifndef CONFIG_PARAVIRT_XXL
+#define arch_end_context_switch(prev) do {} while(0)
+#endif /* CONFIG_PARAVIRT_XXL */
+
/*
* The following only work if pte_present() is true.
* Undefined behaviour if not..
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c24297268ebc..a6220620dcf8 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -576,7 +576,7 @@ static inline bool on_thread_stack(void)
current_stack_pointer) < THREAD_SIZE;
}
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
#include <asm/paravirt.h>
#else
#define __cpuid native_cpuid
@@ -587,7 +587,7 @@ static inline void load_sp0(unsigned long sp0)
}
#define set_iopl_mask native_set_iopl_mask
-#endif /* CONFIG_PARAVIRT */
+#endif /* CONFIG_PARAVIRT_XXL */
/* Free all resources held by a thread. */
extern void release_thread(struct task_struct *);
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 317fc59b512c..2aa6ce4bf159 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -143,8 +143,9 @@ static inline unsigned long __read_cr4(void)
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
-#else
+#endif
+#ifndef CONFIG_PARAVIRT_XXL
static inline unsigned long read_cr0(void)
{
return native_read_cr0();
@@ -154,7 +155,9 @@ static inline void write_cr0(unsigned long x)
{
native_write_cr0(x);
}
+#endif
+#ifndef CONFIG_PARAVIRT
static inline unsigned long read_cr2(void)
{
return native_read_cr2();
@@ -178,7 +181,9 @@ static inline void write_cr3(unsigned long x)
{
native_write_cr3(x);
}
+#endif
+#ifndef CONFIG_PARAVIRT_XXL
static inline void __write_cr4(unsigned long x)
{
native_write_cr4(x);
@@ -208,7 +213,7 @@ static inline void load_gs_index(unsigned selector)
#endif
-#endif/* CONFIG_PARAVIRT */
+#endif/* CONFIG_PARAVIRT_XXL */
static inline void clflush(volatile void *__p)
{
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 37e323f3d8c9..0fe233d98d17 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -68,7 +68,9 @@ void common(void) {
BLANK();
OFFSET(PV_IRQ_irq_disable, paravirt_patch_template, irq.irq_disable);
OFFSET(PV_IRQ_irq_enable, paravirt_patch_template, irq.irq_enable);
+#ifdef CONFIG_PARAVIRT_XXL
OFFSET(PV_CPU_iret, paravirt_patch_template, cpu.iret);
+#endif
OFFSET(PV_MMU_read_cr2, paravirt_patch_template, mmu.read_cr2);
#endif
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 3384b03e717f..2a15d420a84d 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -21,9 +21,11 @@ static char syscalls_ia32[] = {
int main(void)
{
#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
OFFSET(PV_CPU_usergs_sysret64, paravirt_patch_template,
cpu.usergs_sysret64);
OFFSET(PV_CPU_swapgs, paravirt_patch_template, cpu.swapgs);
+#endif
#ifdef CONFIG_DEBUG_ENTRY
OFFSET(PV_IRQ_save_fl, paravirt_patch_template, irq.save_fl);
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 1f089e1c79d3..ffe6904eca1c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1239,7 +1239,7 @@ static void generic_identify(struct cpuinfo_x86 *c)
* ESPFIX issue, we can change this.
*/
#ifdef CONFIG_X86_32
-# ifdef CONFIG_PARAVIRT
+# ifdef CONFIG_PARAVIRT_XXL
do {
extern void native_iret(void);
if (pv_ops.cpu.iret == native_iret)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 15ebc2fc166e..a5bd72a0ee1a 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -31,6 +31,8 @@
#define GET_CR2_INTO(reg) GET_CR2_INTO_RAX ; movq %rax, reg
#else
#define GET_CR2_INTO(reg) movq %cr2, reg
+#endif
+#ifndef CONFIG_PARAVIRT_XXL
#define INTERRUPT_RETURN iretq
#endif
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index ee7da9ec3ea2..d29b46621543 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -101,6 +101,7 @@ static unsigned paravirt_patch_call(void *insnbuf, const void *target,
return 5;
}
+#ifdef CONFIG_PARAVIRT_XXL
static unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
unsigned long addr, unsigned len)
{
@@ -119,6 +120,7 @@ static unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
return 5;
}
+#endif
DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key);
@@ -150,10 +152,12 @@ unsigned paravirt_patch_default(u8 type, void *insnbuf,
else if (opfunc == _paravirt_ident_64)
ret = paravirt_patch_ident_64(insnbuf, len);
+#ifdef CONFIG_PARAVIRT_XXL
else if (type == PARAVIRT_PATCH(cpu.iret) ||
type == PARAVIRT_PATCH(cpu.usergs_sysret64))
/* If operation requires a jmp, then jmp */
ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
+#endif
else
/* Otherwise call the function. */
ret = paravirt_patch_call(insnbuf, opfunc, addr, len);
@@ -262,6 +266,7 @@ void paravirt_flush_lazy_mmu(void)
preempt_enable();
}
+#ifdef CONFIG_PARAVIRT_XXL
void paravirt_start_context_switch(struct task_struct *prev)
{
BUG_ON(preemptible());
@@ -282,6 +287,7 @@ void paravirt_end_context_switch(struct task_struct *next)
if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES))
arch_enter_lazy_mmu_mode();
}
+#endif
enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
{
@@ -320,6 +326,9 @@ struct paravirt_patch_template pv_ops = {
.time.steal_clock = native_steal_clock,
/* Cpu ops. */
+ .cpu.io_delay = native_io_delay,
+
+#ifdef CONFIG_PARAVIRT_XXL
.cpu.cpuid = native_cpuid,
.cpu.get_debugreg = native_get_debugreg,
.cpu.set_debugreg = native_set_debugreg,
@@ -361,10 +370,10 @@ struct paravirt_patch_template pv_ops = {
.cpu.swapgs = native_swapgs,
.cpu.set_iopl_mask = native_set_iopl_mask,
- .cpu.io_delay = native_io_delay,
.cpu.start_context_switch = paravirt_nop,
.cpu.end_context_switch = paravirt_nop,
+#endif
/* Irq ops. */
.irq.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
@@ -463,10 +472,12 @@ struct paravirt_patch_template pv_ops = {
#endif
};
+#ifdef CONFIG_PARAVIRT_XXL
/* At this point, native_get/set_debugreg has real function entries */
NOKPROBE_SYMBOL(native_get_debugreg);
NOKPROBE_SYMBOL(native_set_debugreg);
NOKPROBE_SYMBOL(native_load_idt);
+#endif
EXPORT_SYMBOL_GPL(pv_ops);
EXPORT_SYMBOL_GPL(pv_info);
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index 026fa43e9261..5a20aa56efc0 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -5,7 +5,9 @@ DEF_NATIVE(irq, irq_disable, "cli");
DEF_NATIVE(irq, irq_enable, "sti");
DEF_NATIVE(irq, restore_fl, "push %eax; popf");
DEF_NATIVE(irq, save_fl, "pushf; pop %eax");
+#ifdef CONFIG_PARAVIRT_XXL
DEF_NATIVE(cpu, iret, "iret");
+#endif
DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax");
DEF_NATIVE(mmu, write_cr3, "mov %eax, %cr3");
DEF_NATIVE(mmu, read_cr3, "mov %cr3, %eax");
@@ -45,7 +47,9 @@ unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
PATCH_SITE(irq, irq_enable);
PATCH_SITE(irq, restore_fl);
PATCH_SITE(irq, save_fl);
+#ifdef CONFIG_PARAVIRT_XXL
PATCH_SITE(cpu, iret);
+#endif
PATCH_SITE(mmu, read_cr2);
PATCH_SITE(mmu, read_cr3);
PATCH_SITE(mmu, write_cr3);
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 582e893728e8..461aba038ada 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -10,10 +10,12 @@ DEF_NATIVE(irq, save_fl, "pushfq; popq %rax");
DEF_NATIVE(mmu, read_cr2, "movq %cr2, %rax");
DEF_NATIVE(mmu, read_cr3, "movq %cr3, %rax");
DEF_NATIVE(mmu, write_cr3, "movq %rdi, %cr3");
+#ifdef CONFIG_PARAVIRT_XXL
DEF_NATIVE(cpu, wbinvd, "wbinvd");
DEF_NATIVE(cpu, usergs_sysret64, "swapgs; sysretq");
DEF_NATIVE(cpu, swapgs, "swapgs");
+#endif
DEF_NATIVE(, mov32, "mov %edi, %eax");
DEF_NATIVE(, mov64, "mov %rdi, %rax");
@@ -53,12 +55,14 @@ unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
PATCH_SITE(irq, save_fl);
PATCH_SITE(irq, irq_enable);
PATCH_SITE(irq, irq_disable);
+#ifdef CONFIG_PARAVIRT_XXL
PATCH_SITE(cpu, usergs_sysret64);
PATCH_SITE(cpu, swapgs);
+ PATCH_SITE(cpu, wbinvd);
+#endif
PATCH_SITE(mmu, read_cr2);
PATCH_SITE(mmu, read_cr3);
PATCH_SITE(mmu, write_cr3);
- PATCH_SITE(cpu, wbinvd);
#if defined(CONFIG_PARAVIRT_SPINLOCKS)
case PARAVIRT_PATCH(lock.queued_spin_unlock):
if (pv_is_native_spin_unlock()) {
--
2.16.4
^ permalink raw reply related
* [PATCH v3 11/15] x86/paravirt: move items in pv_info under PARAVIRT_XXL umbrella
From: Juergen Gross @ 2018-08-28 7:40 UTC (permalink / raw)
To: linux-kernel, xen-devel, x86, virtualization
Cc: Juergen Gross, boris.ostrovsky, rusty, mingo, hpa, akataria, tglx
In-Reply-To: <20180828074026.820-1-jgross@suse.com>
All items but name in pv_info are needed by Xen PV only. Define them
with CONFIG_PARAVIRT_XXL set only.
Signed-off-by: Juergen Gross <jgross@suse.com>
---
arch/x86/include/asm/paravirt.h | 2 ++
arch/x86/include/asm/paravirt_types.h | 2 ++
arch/x86/include/asm/pgtable-3level_types.h | 2 +-
arch/x86/include/asm/ptrace.h | 2 +-
arch/x86/include/asm/segment.h | 2 +-
arch/x86/kernel/paravirt.c | 2 ++
6 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 2ba23e98c5cd..dbaedf1ed622 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -104,7 +104,9 @@ static inline void wbinvd(void)
PVOP_VCALL0(cpu.wbinvd);
}
+#ifdef CONFIG_PARAVIRT_XXL
#define get_kernel_rpl() (pv_info.kernel_rpl)
+#endif
static inline u64 paravirt_read_msr(unsigned msr)
{
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 462a54b2bb53..98e1110354b8 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -66,11 +66,13 @@ struct paravirt_callee_save {
/* general info */
struct pv_info {
+#ifdef CONFIG_PARAVIRT_XXL
unsigned int kernel_rpl;
int shared_kernel_pmd;
#ifdef CONFIG_X86_64
u16 extra_user_64bit_cs; /* __USER_CS if none */
+#endif
#endif
const char *name;
diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h
index 858358a82b14..33845d36897c 100644
--- a/arch/x86/include/asm/pgtable-3level_types.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -20,7 +20,7 @@ typedef union {
} pte_t;
#endif /* !__ASSEMBLY__ */
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
#define SHARED_KERNEL_PMD ((!static_cpu_has(X86_FEATURE_PTI) && \
(pv_info.shared_kernel_pmd)))
#else
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 6de1fd3d0097..250be50aa257 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -144,7 +144,7 @@ static inline int v8086_mode(struct pt_regs *regs)
static inline bool user_64bit_mode(struct pt_regs *regs)
{
#ifdef CONFIG_X86_64
-#ifndef CONFIG_PARAVIRT
+#ifndef CONFIG_PARAVIRT_XXL
/*
* On non-paravirt systems, this is the only long mode CPL 3
* selector. We do not allow long mode selectors in the LDT.
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index e293c122d0d5..0ffbe9519e68 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -211,7 +211,7 @@
#endif
-#ifndef CONFIG_PARAVIRT
+#ifndef CONFIG_PARAVIRT_XXL
# define get_kernel_rpl() 0
#endif
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 5fc2b94e1591..ee7da9ec3ea2 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -293,12 +293,14 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
struct pv_info pv_info = {
.name = "bare hardware",
+#ifdef CONFIG_PARAVIRT_XXL
.kernel_rpl = 0,
.shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
#ifdef CONFIG_X86_64
.extra_user_64bit_cs = __USER_CS,
#endif
+#endif
};
#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
--
2.16.4
^ permalink raw reply related
* [PATCH v3 10/15] x86/paravirt: introduce new config option PARAVIRT_XXL
From: Juergen Gross @ 2018-08-28 7:40 UTC (permalink / raw)
To: linux-kernel, xen-devel, x86, virtualization
Cc: Juergen Gross, boris.ostrovsky, rusty, mingo, hpa, akataria, tglx
In-Reply-To: <20180828074026.820-1-jgross@suse.com>
A large amount of paravirt ops is used by Xen PV guests only. Add a new
config option PARAVIRT_XXL which is selected by XEN_PV. Later we can
put the Xen PV only paravirt ops under the PARAVIRT_XXL umbrella.
Since irq related paravirt ops are used only by VSMP and Xen PV, let
VSMP select PARAVIRT_XXL, too, in order to enable moving the irq ops
under PARAVIRT_XXL.
Signed-off-by: Juergen Gross <jgross@suse.com>
---
V3:
- let VSMP select PARAVIRT_XXL (Peter Zijlstra)
---
arch/x86/Kconfig | 4 ++++
arch/x86/boot/compressed/misc.h | 1 +
arch/x86/mm/mem_encrypt_identity.c | 1 +
arch/x86/xen/Kconfig | 1 +
4 files changed, 7 insertions(+)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c5ff296bc5d1..61dfdb694483 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -523,6 +523,7 @@ config X86_VSMP
bool "ScaleMP vSMP"
select HYPERVISOR_GUEST
select PARAVIRT
+ select PARAVIRT_XXL
depends on X86_64 && PCI
depends on X86_EXTENDED_PLATFORM
depends on SMP
@@ -754,6 +755,9 @@ config PARAVIRT
over full virtualization. However, when run without a hypervisor
the kernel is theoretically slower and slightly larger.
+config PARAVIRT_XXL
+ bool
+
config PARAVIRT_DEBUG
bool "paravirt-ops debugging"
depends on PARAVIRT && DEBUG_KERNEL
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index a423bdb42686..a1d5918765f3 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -9,6 +9,7 @@
* paravirt and debugging variants are added.)
*/
#undef CONFIG_PARAVIRT
+#undef CONFIG_PARAVIRT_XXL
#undef CONFIG_PARAVIRT_SPINLOCKS
#undef CONFIG_KASAN
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index 7ae36868aed2..a19ef1a416ff 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -27,6 +27,7 @@
* be extended when new paravirt and debugging variants are added.)
*/
#undef CONFIG_PARAVIRT
+#undef CONFIG_PARAVIRT_XXL
#undef CONFIG_PARAVIRT_SPINLOCKS
#include <linux/kernel.h>
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index c1f98f32c45f..dd92d7bd3613 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -18,6 +18,7 @@ config XEN_PV
bool "Xen PV guest support"
default y
depends on XEN
+ select PARAVIRT_XXL
select XEN_HAVE_PVMMU
select XEN_HAVE_VPMU
help
--
2.16.4
^ permalink raw reply related
* [PATCH v3 09/15] x86/paravirt: remove unused paravirt bits
From: Juergen Gross @ 2018-08-28 7:40 UTC (permalink / raw)
To: linux-kernel, xen-devel, x86, virtualization
Cc: Juergen Gross, boris.ostrovsky, rusty, mingo, hpa, akataria, tglx
In-Reply-To: <20180828074026.820-1-jgross@suse.com>
The macros ENABLE_INTERRUPTS_SYSEXIT, GET_CR0_INTO_EAX and
PARAVIRT_ADJUST_EXCEPTION_FRAME are used nowhere. Remove their
definitions.
Signed-off-by: Juergen Gross <jgross@suse.com>
---
arch/x86/include/asm/irqflags.h | 4 ----
arch/x86/include/asm/paravirt.h | 9 +--------
arch/x86/kernel/asm-offsets.c | 1 -
3 files changed, 1 insertion(+), 13 deletions(-)
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index c14f2a74b2be..250bcd40d769 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -134,8 +134,6 @@ static inline notrace unsigned long arch_local_irq_save(void)
*/
#define SWAPGS_UNSAFE_STACK swapgs
-#define PARAVIRT_ADJUST_EXCEPTION_FRAME /* */
-
#define INTERRUPT_RETURN jmp native_iret
#define USERGS_SYSRET64 \
swapgs; \
@@ -149,8 +147,6 @@ static inline notrace unsigned long arch_local_irq_save(void)
#endif
#else
#define INTERRUPT_RETURN iret
-#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
-#define GET_CR0_INTO_EAX movl %cr0, %eax
#endif
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index a9189b81c7dd..2ba23e98c5cd 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -887,14 +887,7 @@ extern void default_banner(void);
call PARA_INDIRECT(pv_ops+PV_IRQ_irq_enable); \
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
-#ifdef CONFIG_X86_32
-#define GET_CR0_INTO_EAX \
- push %ecx; push %edx; \
- ANNOTATE_RETPOLINE_SAFE; \
- call PARA_INDIRECT(pv_ops+PV_CPU_read_cr0); \
- pop %edx; pop %ecx
-#else /* !CONFIG_X86_32 */
-
+#ifdef CONFIG_X86_64
/*
* If swapgs is used while the userspace stack is still current,
* there's no way to call a pvop. The PV replacement *must* be
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 46d14498e214..37e323f3d8c9 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -69,7 +69,6 @@ void common(void) {
OFFSET(PV_IRQ_irq_disable, paravirt_patch_template, irq.irq_disable);
OFFSET(PV_IRQ_irq_enable, paravirt_patch_template, irq.irq_enable);
OFFSET(PV_CPU_iret, paravirt_patch_template, cpu.iret);
- OFFSET(PV_CPU_read_cr0, paravirt_patch_template, cpu.read_cr0);
OFFSET(PV_MMU_read_cr2, paravirt_patch_template, mmu.read_cr2);
#endif
--
2.16.4
^ permalink raw reply related
* [PATCH v3 08/15] x86/paravirt: use a single ops structure
From: Juergen Gross @ 2018-08-28 7:40 UTC (permalink / raw)
To: linux-kernel, xen-devel, x86, virtualization
Cc: Juergen Gross, boris.ostrovsky, rusty, mingo, hpa, akataria, tglx
In-Reply-To: <20180828074026.820-1-jgross@suse.com>
Instead of using six globally visible paravirt ops structures combine
them in a single structure, keeping the original structures as
sub-structures.
This avoids the need to assemble struct paravirt_patch_template at
runtime on the stack each time apply_paravirt() is being called (i.e.
when loading a module).
Signed-off-by: Juergen Gross <jgross@suse.com>
---
V2:
- shorten pv_ops sub-structure names (Jan Beulich)
V3:
- rebase to master
- fix build failure on ARM
---
arch/arm/include/asm/paravirt.h | 9 +-
arch/arm/kernel/paravirt.c | 4 +-
arch/arm64/include/asm/paravirt.h | 9 +-
arch/arm64/kernel/paravirt.c | 4 +-
arch/x86/hyperv/mmu.c | 4 +-
arch/x86/include/asm/paravirt.h | 275 +++++++++++++++------------------
arch/x86/include/asm/paravirt_types.h | 25 ++-
arch/x86/kernel/alternative.c | 2 +-
arch/x86/kernel/asm-offsets.c | 12 +-
arch/x86/kernel/asm-offsets_64.c | 7 +-
arch/x86/kernel/cpu/common.c | 2 +-
arch/x86/kernel/cpu/vmware.c | 4 +-
arch/x86/kernel/kvm.c | 19 +--
arch/x86/kernel/kvmclock.c | 4 +-
arch/x86/kernel/paravirt-spinlocks.c | 15 +-
arch/x86/kernel/paravirt.c | 283 ++++++++++++++++------------------
arch/x86/kernel/paravirt_patch_32.c | 48 +++---
arch/x86/kernel/paravirt_patch_64.c | 56 +++----
arch/x86/kernel/tsc.c | 2 +-
arch/x86/kernel/vsmp_64.c | 18 +--
arch/x86/xen/enlighten_pv.c | 31 ++--
arch/x86/xen/irq.c | 2 +-
arch/x86/xen/mmu_hvm.c | 2 +-
arch/x86/xen/mmu_pv.c | 28 ++--
arch/x86/xen/spinlock.c | 11 +-
arch/x86/xen/time.c | 4 +-
drivers/xen/time.c | 2 +-
27 files changed, 427 insertions(+), 455 deletions(-)
diff --git a/arch/arm/include/asm/paravirt.h b/arch/arm/include/asm/paravirt.h
index d51e5cd31d01..cdbf02d9c1d4 100644
--- a/arch/arm/include/asm/paravirt.h
+++ b/arch/arm/include/asm/paravirt.h
@@ -10,11 +10,16 @@ extern struct static_key paravirt_steal_rq_enabled;
struct pv_time_ops {
unsigned long long (*steal_clock)(int cpu);
};
-extern struct pv_time_ops pv_time_ops;
+
+struct paravirt_patch_template {
+ struct pv_time_ops time;
+};
+
+extern struct paravirt_patch_template pv_ops;
static inline u64 paravirt_steal_clock(int cpu)
{
- return pv_time_ops.steal_clock(cpu);
+ return pv_ops.time.steal_clock(cpu);
}
#endif
diff --git a/arch/arm/kernel/paravirt.c b/arch/arm/kernel/paravirt.c
index 53f371ed4568..75c158b0353f 100644
--- a/arch/arm/kernel/paravirt.c
+++ b/arch/arm/kernel/paravirt.c
@@ -21,5 +21,5 @@
struct static_key paravirt_steal_enabled;
struct static_key paravirt_steal_rq_enabled;
-struct pv_time_ops pv_time_ops;
-EXPORT_SYMBOL_GPL(pv_time_ops);
+struct paravirt_patch_template pv_ops;
+EXPORT_SYMBOL_GPL(pv_ops);
diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h
index bb5dcea42003..799d9dd6f7cc 100644
--- a/arch/arm64/include/asm/paravirt.h
+++ b/arch/arm64/include/asm/paravirt.h
@@ -10,11 +10,16 @@ extern struct static_key paravirt_steal_rq_enabled;
struct pv_time_ops {
unsigned long long (*steal_clock)(int cpu);
};
-extern struct pv_time_ops pv_time_ops;
+
+struct paravirt_patch_template {
+ struct pv_time_ops time;
+};
+
+extern struct paravirt_patch_template pv_ops;
static inline u64 paravirt_steal_clock(int cpu)
{
- return pv_time_ops.steal_clock(cpu);
+ return pv_ops.time.steal_clock(cpu);
}
#endif
diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
index 53f371ed4568..75c158b0353f 100644
--- a/arch/arm64/kernel/paravirt.c
+++ b/arch/arm64/kernel/paravirt.c
@@ -21,5 +21,5 @@
struct static_key paravirt_steal_enabled;
struct static_key paravirt_steal_rq_enabled;
-struct pv_time_ops pv_time_ops;
-EXPORT_SYMBOL_GPL(pv_time_ops);
+struct paravirt_patch_template pv_ops;
+EXPORT_SYMBOL_GPL(pv_ops);
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
index ef5f29f913d7..e65d7fe6489f 100644
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -231,6 +231,6 @@ void hyperv_setup_mmu_ops(void)
return;
pr_info("Using hypercall for remote TLB flush\n");
- pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
- pv_mmu_ops.tlb_remove_table = tlb_remove_table;
+ pv_ops.mmu.flush_tlb_others = hyperv_flush_tlb_others;
+ pv_ops.mmu.tlb_remove_table = tlb_remove_table;
}
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index e1364cb40ce5..a9189b81c7dd 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -19,14 +19,14 @@
static inline void load_sp0(unsigned long sp0)
{
- PVOP_VCALL1(pv_cpu_ops.load_sp0, sp0);
+ PVOP_VCALL1(cpu.load_sp0, sp0);
}
/* The paravirtualized CPUID instruction. */
static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
{
- PVOP_VCALL4(pv_cpu_ops.cpuid, eax, ebx, ecx, edx);
+ PVOP_VCALL4(cpu.cpuid, eax, ebx, ecx, edx);
}
/*
@@ -34,98 +34,98 @@ static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
*/
static inline unsigned long paravirt_get_debugreg(int reg)
{
- return PVOP_CALL1(unsigned long, pv_cpu_ops.get_debugreg, reg);
+ return PVOP_CALL1(unsigned long, cpu.get_debugreg, reg);
}
#define get_debugreg(var, reg) var = paravirt_get_debugreg(reg)
static inline void set_debugreg(unsigned long val, int reg)
{
- PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val);
+ PVOP_VCALL2(cpu.set_debugreg, reg, val);
}
static inline unsigned long read_cr0(void)
{
- return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0);
+ return PVOP_CALL0(unsigned long, cpu.read_cr0);
}
static inline void write_cr0(unsigned long x)
{
- PVOP_VCALL1(pv_cpu_ops.write_cr0, x);
+ PVOP_VCALL1(cpu.write_cr0, x);
}
static inline unsigned long read_cr2(void)
{
- return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr2);
+ return PVOP_CALL0(unsigned long, mmu.read_cr2);
}
static inline void write_cr2(unsigned long x)
{
- PVOP_VCALL1(pv_mmu_ops.write_cr2, x);
+ PVOP_VCALL1(mmu.write_cr2, x);
}
static inline unsigned long __read_cr3(void)
{
- return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3);
+ return PVOP_CALL0(unsigned long, mmu.read_cr3);
}
static inline void write_cr3(unsigned long x)
{
- PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
+ PVOP_VCALL1(mmu.write_cr3, x);
}
static inline void __write_cr4(unsigned long x)
{
- PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
+ PVOP_VCALL1(cpu.write_cr4, x);
}
#ifdef CONFIG_X86_64
static inline unsigned long read_cr8(void)
{
- return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr8);
+ return PVOP_CALL0(unsigned long, cpu.read_cr8);
}
static inline void write_cr8(unsigned long x)
{
- PVOP_VCALL1(pv_cpu_ops.write_cr8, x);
+ PVOP_VCALL1(cpu.write_cr8, x);
}
#endif
static inline void arch_safe_halt(void)
{
- PVOP_VCALL0(pv_irq_ops.safe_halt);
+ PVOP_VCALL0(irq.safe_halt);
}
static inline void halt(void)
{
- PVOP_VCALL0(pv_irq_ops.halt);
+ PVOP_VCALL0(irq.halt);
}
static inline void wbinvd(void)
{
- PVOP_VCALL0(pv_cpu_ops.wbinvd);
+ PVOP_VCALL0(cpu.wbinvd);
}
#define get_kernel_rpl() (pv_info.kernel_rpl)
static inline u64 paravirt_read_msr(unsigned msr)
{
- return PVOP_CALL1(u64, pv_cpu_ops.read_msr, msr);
+ return PVOP_CALL1(u64, cpu.read_msr, msr);
}
static inline void paravirt_write_msr(unsigned msr,
unsigned low, unsigned high)
{
- PVOP_VCALL3(pv_cpu_ops.write_msr, msr, low, high);
+ PVOP_VCALL3(cpu.write_msr, msr, low, high);
}
static inline u64 paravirt_read_msr_safe(unsigned msr, int *err)
{
- return PVOP_CALL2(u64, pv_cpu_ops.read_msr_safe, msr, err);
+ return PVOP_CALL2(u64, cpu.read_msr_safe, msr, err);
}
static inline int paravirt_write_msr_safe(unsigned msr,
unsigned low, unsigned high)
{
- return PVOP_CALL3(int, pv_cpu_ops.write_msr_safe, msr, low, high);
+ return PVOP_CALL3(int, cpu.write_msr_safe, msr, low, high);
}
#define rdmsr(msr, val1, val2) \
@@ -172,7 +172,7 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
static inline unsigned long long paravirt_sched_clock(void)
{
- return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
+ return PVOP_CALL0(unsigned long long, time.sched_clock);
}
struct static_key;
@@ -181,12 +181,12 @@ extern struct static_key paravirt_steal_rq_enabled;
static inline u64 paravirt_steal_clock(int cpu)
{
- return PVOP_CALL1(u64, pv_time_ops.steal_clock, cpu);
+ return PVOP_CALL1(u64, time.steal_clock, cpu);
}
static inline unsigned long long paravirt_read_pmc(int counter)
{
- return PVOP_CALL1(u64, pv_cpu_ops.read_pmc, counter);
+ return PVOP_CALL1(u64, cpu.read_pmc, counter);
}
#define rdpmc(counter, low, high) \
@@ -200,166 +200,166 @@ do { \
static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
{
- PVOP_VCALL2(pv_cpu_ops.alloc_ldt, ldt, entries);
+ PVOP_VCALL2(cpu.alloc_ldt, ldt, entries);
}
static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
{
- PVOP_VCALL2(pv_cpu_ops.free_ldt, ldt, entries);
+ PVOP_VCALL2(cpu.free_ldt, ldt, entries);
}
static inline void load_TR_desc(void)
{
- PVOP_VCALL0(pv_cpu_ops.load_tr_desc);
+ PVOP_VCALL0(cpu.load_tr_desc);
}
static inline void load_gdt(const struct desc_ptr *dtr)
{
- PVOP_VCALL1(pv_cpu_ops.load_gdt, dtr);
+ PVOP_VCALL1(cpu.load_gdt, dtr);
}
static inline void load_idt(const struct desc_ptr *dtr)
{
- PVOP_VCALL1(pv_cpu_ops.load_idt, dtr);
+ PVOP_VCALL1(cpu.load_idt, dtr);
}
static inline void set_ldt(const void *addr, unsigned entries)
{
- PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
+ PVOP_VCALL2(cpu.set_ldt, addr, entries);
}
static inline unsigned long paravirt_store_tr(void)
{
- return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr);
+ return PVOP_CALL0(unsigned long, cpu.store_tr);
}
#define store_tr(tr) ((tr) = paravirt_store_tr())
static inline void load_TLS(struct thread_struct *t, unsigned cpu)
{
- PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu);
+ PVOP_VCALL2(cpu.load_tls, t, cpu);
}
#ifdef CONFIG_X86_64
static inline void load_gs_index(unsigned int gs)
{
- PVOP_VCALL1(pv_cpu_ops.load_gs_index, gs);
+ PVOP_VCALL1(cpu.load_gs_index, gs);
}
#endif
static inline void write_ldt_entry(struct desc_struct *dt, int entry,
const void *desc)
{
- PVOP_VCALL3(pv_cpu_ops.write_ldt_entry, dt, entry, desc);
+ PVOP_VCALL3(cpu.write_ldt_entry, dt, entry, desc);
}
static inline void write_gdt_entry(struct desc_struct *dt, int entry,
void *desc, int type)
{
- PVOP_VCALL4(pv_cpu_ops.write_gdt_entry, dt, entry, desc, type);
+ PVOP_VCALL4(cpu.write_gdt_entry, dt, entry, desc, type);
}
static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
{
- PVOP_VCALL3(pv_cpu_ops.write_idt_entry, dt, entry, g);
+ PVOP_VCALL3(cpu.write_idt_entry, dt, entry, g);
}
static inline void set_iopl_mask(unsigned mask)
{
- PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask);
+ PVOP_VCALL1(cpu.set_iopl_mask, mask);
}
/* The paravirtualized I/O functions */
static inline void slow_down_io(void)
{
- pv_cpu_ops.io_delay();
+ pv_ops.cpu.io_delay();
#ifdef REALLY_SLOW_IO
- pv_cpu_ops.io_delay();
- pv_cpu_ops.io_delay();
- pv_cpu_ops.io_delay();
+ pv_ops.cpu.io_delay();
+ pv_ops.cpu.io_delay();
+ pv_ops.cpu.io_delay();
#endif
}
static inline void paravirt_activate_mm(struct mm_struct *prev,
struct mm_struct *next)
{
- PVOP_VCALL2(pv_mmu_ops.activate_mm, prev, next);
+ PVOP_VCALL2(mmu.activate_mm, prev, next);
}
static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm,
struct mm_struct *mm)
{
- PVOP_VCALL2(pv_mmu_ops.dup_mmap, oldmm, mm);
+ PVOP_VCALL2(mmu.dup_mmap, oldmm, mm);
}
static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
{
- PVOP_VCALL1(pv_mmu_ops.exit_mmap, mm);
+ PVOP_VCALL1(mmu.exit_mmap, mm);
}
static inline void __flush_tlb(void)
{
- PVOP_VCALL0(pv_mmu_ops.flush_tlb_user);
+ PVOP_VCALL0(mmu.flush_tlb_user);
}
static inline void __flush_tlb_global(void)
{
- PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel);
+ PVOP_VCALL0(mmu.flush_tlb_kernel);
}
static inline void __flush_tlb_one_user(unsigned long addr)
{
- PVOP_VCALL1(pv_mmu_ops.flush_tlb_one_user, addr);
+ PVOP_VCALL1(mmu.flush_tlb_one_user, addr);
}
static inline void flush_tlb_others(const struct cpumask *cpumask,
const struct flush_tlb_info *info)
{
- PVOP_VCALL2(pv_mmu_ops.flush_tlb_others, cpumask, info);
+ PVOP_VCALL2(mmu.flush_tlb_others, cpumask, info);
}
static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
{
- PVOP_VCALL2(pv_mmu_ops.tlb_remove_table, tlb, table);
+ PVOP_VCALL2(mmu.tlb_remove_table, tlb, table);
}
static inline int paravirt_pgd_alloc(struct mm_struct *mm)
{
- return PVOP_CALL1(int, pv_mmu_ops.pgd_alloc, mm);
+ return PVOP_CALL1(int, mmu.pgd_alloc, mm);
}
static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
- PVOP_VCALL2(pv_mmu_ops.pgd_free, mm, pgd);
+ PVOP_VCALL2(mmu.pgd_free, mm, pgd);
}
static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn)
{
- PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn);
+ PVOP_VCALL2(mmu.alloc_pte, mm, pfn);
}
static inline void paravirt_release_pte(unsigned long pfn)
{
- PVOP_VCALL1(pv_mmu_ops.release_pte, pfn);
+ PVOP_VCALL1(mmu.release_pte, pfn);
}
static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
{
- PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn);
+ PVOP_VCALL2(mmu.alloc_pmd, mm, pfn);
}
static inline void paravirt_release_pmd(unsigned long pfn)
{
- PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn);
+ PVOP_VCALL1(mmu.release_pmd, pfn);
}
static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn)
{
- PVOP_VCALL2(pv_mmu_ops.alloc_pud, mm, pfn);
+ PVOP_VCALL2(mmu.alloc_pud, mm, pfn);
}
static inline void paravirt_release_pud(unsigned long pfn)
{
- PVOP_VCALL1(pv_mmu_ops.release_pud, pfn);
+ PVOP_VCALL1(mmu.release_pud, pfn);
}
static inline void paravirt_alloc_p4d(struct mm_struct *mm, unsigned long pfn)
{
- PVOP_VCALL2(pv_mmu_ops.alloc_p4d, mm, pfn);
+ PVOP_VCALL2(mmu.alloc_p4d, mm, pfn);
}
static inline void paravirt_release_p4d(unsigned long pfn)
{
- PVOP_VCALL1(pv_mmu_ops.release_p4d, pfn);
+ PVOP_VCALL1(mmu.release_p4d, pfn);
}
static inline pte_t __pte(pteval_t val)
@@ -367,13 +367,9 @@ static inline pte_t __pte(pteval_t val)
pteval_t ret;
if (sizeof(pteval_t) > sizeof(long))
- ret = PVOP_CALLEE2(pteval_t,
- pv_mmu_ops.make_pte,
- val, (u64)val >> 32);
+ ret = PVOP_CALLEE2(pteval_t, mmu.make_pte, val, (u64)val >> 32);
else
- ret = PVOP_CALLEE1(pteval_t,
- pv_mmu_ops.make_pte,
- val);
+ ret = PVOP_CALLEE1(pteval_t, mmu.make_pte, val);
return (pte_t) { .pte = ret };
}
@@ -383,11 +379,10 @@ static inline pteval_t pte_val(pte_t pte)
pteval_t ret;
if (sizeof(pteval_t) > sizeof(long))
- ret = PVOP_CALLEE2(pteval_t, pv_mmu_ops.pte_val,
+ ret = PVOP_CALLEE2(pteval_t, mmu.pte_val,
pte.pte, (u64)pte.pte >> 32);
else
- ret = PVOP_CALLEE1(pteval_t, pv_mmu_ops.pte_val,
- pte.pte);
+ ret = PVOP_CALLEE1(pteval_t, mmu.pte_val, pte.pte);
return ret;
}
@@ -397,11 +392,9 @@ static inline pgd_t __pgd(pgdval_t val)
pgdval_t ret;
if (sizeof(pgdval_t) > sizeof(long))
- ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.make_pgd,
- val, (u64)val >> 32);
+ ret = PVOP_CALLEE2(pgdval_t, mmu.make_pgd, val, (u64)val >> 32);
else
- ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.make_pgd,
- val);
+ ret = PVOP_CALLEE1(pgdval_t, mmu.make_pgd, val);
return (pgd_t) { ret };
}
@@ -411,11 +404,10 @@ static inline pgdval_t pgd_val(pgd_t pgd)
pgdval_t ret;
if (sizeof(pgdval_t) > sizeof(long))
- ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.pgd_val,
+ ret = PVOP_CALLEE2(pgdval_t, mmu.pgd_val,
pgd.pgd, (u64)pgd.pgd >> 32);
else
- ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.pgd_val,
- pgd.pgd);
+ ret = PVOP_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd);
return ret;
}
@@ -426,8 +418,7 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long a
{
pteval_t ret;
- ret = PVOP_CALL3(pteval_t, pv_mmu_ops.ptep_modify_prot_start,
- mm, addr, ptep);
+ ret = PVOP_CALL3(pteval_t, mmu.ptep_modify_prot_start, mm, addr, ptep);
return (pte_t) { .pte = ret };
}
@@ -437,20 +428,18 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long a
{
if (sizeof(pteval_t) > sizeof(long))
/* 5 arg words */
- pv_mmu_ops.ptep_modify_prot_commit(mm, addr, ptep, pte);
+ pv_ops.mmu.ptep_modify_prot_commit(mm, addr, ptep, pte);
else
- PVOP_VCALL4(pv_mmu_ops.ptep_modify_prot_commit,
+ PVOP_VCALL4(mmu.ptep_modify_prot_commit,
mm, addr, ptep, pte.pte);
}
static inline void set_pte(pte_t *ptep, pte_t pte)
{
if (sizeof(pteval_t) > sizeof(long))
- PVOP_VCALL3(pv_mmu_ops.set_pte, ptep,
- pte.pte, (u64)pte.pte >> 32);
+ PVOP_VCALL3(mmu.set_pte, ptep, pte.pte, (u64)pte.pte >> 32);
else
- PVOP_VCALL2(pv_mmu_ops.set_pte, ptep,
- pte.pte);
+ PVOP_VCALL2(mmu.set_pte, ptep, pte.pte);
}
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
@@ -458,9 +447,9 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
{
if (sizeof(pteval_t) > sizeof(long))
/* 5 arg words */
- pv_mmu_ops.set_pte_at(mm, addr, ptep, pte);
+ pv_ops.mmu.set_pte_at(mm, addr, ptep, pte);
else
- PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte);
+ PVOP_VCALL4(mmu.set_pte_at, mm, addr, ptep, pte.pte);
}
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
@@ -468,9 +457,9 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
pmdval_t val = native_pmd_val(pmd);
if (sizeof(pmdval_t) > sizeof(long))
- PVOP_VCALL3(pv_mmu_ops.set_pmd, pmdp, val, (u64)val >> 32);
+ PVOP_VCALL3(mmu.set_pmd, pmdp, val, (u64)val >> 32);
else
- PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val);
+ PVOP_VCALL2(mmu.set_pmd, pmdp, val);
}
#if CONFIG_PGTABLE_LEVELS >= 3
@@ -479,11 +468,9 @@ static inline pmd_t __pmd(pmdval_t val)
pmdval_t ret;
if (sizeof(pmdval_t) > sizeof(long))
- ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.make_pmd,
- val, (u64)val >> 32);
+ ret = PVOP_CALLEE2(pmdval_t, mmu.make_pmd, val, (u64)val >> 32);
else
- ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.make_pmd,
- val);
+ ret = PVOP_CALLEE1(pmdval_t, mmu.make_pmd, val);
return (pmd_t) { ret };
}
@@ -493,11 +480,10 @@ static inline pmdval_t pmd_val(pmd_t pmd)
pmdval_t ret;
if (sizeof(pmdval_t) > sizeof(long))
- ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.pmd_val,
+ ret = PVOP_CALLEE2(pmdval_t, mmu.pmd_val,
pmd.pmd, (u64)pmd.pmd >> 32);
else
- ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.pmd_val,
- pmd.pmd);
+ ret = PVOP_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd);
return ret;
}
@@ -507,11 +493,9 @@ static inline void set_pud(pud_t *pudp, pud_t pud)
pudval_t val = native_pud_val(pud);
if (sizeof(pudval_t) > sizeof(long))
- PVOP_VCALL3(pv_mmu_ops.set_pud, pudp,
- val, (u64)val >> 32);
+ PVOP_VCALL3(mmu.set_pud, pudp, val, (u64)val >> 32);
else
- PVOP_VCALL2(pv_mmu_ops.set_pud, pudp,
- val);
+ PVOP_VCALL2(mmu.set_pud, pudp, val);
}
#if CONFIG_PGTABLE_LEVELS >= 4
static inline pud_t __pud(pudval_t val)
@@ -519,11 +503,9 @@ static inline pud_t __pud(pudval_t val)
pudval_t ret;
if (sizeof(pudval_t) > sizeof(long))
- ret = PVOP_CALLEE2(pudval_t, pv_mmu_ops.make_pud,
- val, (u64)val >> 32);
+ ret = PVOP_CALLEE2(pudval_t, mmu.make_pud, val, (u64)val >> 32);
else
- ret = PVOP_CALLEE1(pudval_t, pv_mmu_ops.make_pud,
- val);
+ ret = PVOP_CALLEE1(pudval_t, mmu.make_pud, val);
return (pud_t) { ret };
}
@@ -533,11 +515,10 @@ static inline pudval_t pud_val(pud_t pud)
pudval_t ret;
if (sizeof(pudval_t) > sizeof(long))
- ret = PVOP_CALLEE2(pudval_t, pv_mmu_ops.pud_val,
+ ret = PVOP_CALLEE2(pudval_t, mmu.pud_val,
pud.pud, (u64)pud.pud >> 32);
else
- ret = PVOP_CALLEE1(pudval_t, pv_mmu_ops.pud_val,
- pud.pud);
+ ret = PVOP_CALLEE1(pudval_t, mmu.pud_val, pud.pud);
return ret;
}
@@ -552,30 +533,28 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
p4dval_t val = native_p4d_val(p4d);
if (sizeof(p4dval_t) > sizeof(long))
- PVOP_VCALL3(pv_mmu_ops.set_p4d, p4dp,
- val, (u64)val >> 32);
+ PVOP_VCALL3(mmu.set_p4d, p4dp, val, (u64)val >> 32);
else
- PVOP_VCALL2(pv_mmu_ops.set_p4d, p4dp,
- val);
+ PVOP_VCALL2(mmu.set_p4d, p4dp, val);
}
#if CONFIG_PGTABLE_LEVELS >= 5
static inline p4d_t __p4d(p4dval_t val)
{
- p4dval_t ret = PVOP_CALLEE1(p4dval_t, pv_mmu_ops.make_p4d, val);
+ p4dval_t ret = PVOP_CALLEE1(p4dval_t, mmu.make_p4d, val);
return (p4d_t) { ret };
}
static inline p4dval_t p4d_val(p4d_t p4d)
{
- return PVOP_CALLEE1(p4dval_t, pv_mmu_ops.p4d_val, p4d.p4d);
+ return PVOP_CALLEE1(p4dval_t, mmu.p4d_val, p4d.p4d);
}
static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd)
{
- PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, native_pgd_val(pgd));
+ PVOP_VCALL2(mmu.set_pgd, pgdp, native_pgd_val(pgd));
}
#define set_pgd(pgdp, pgdval) do { \
@@ -606,19 +585,18 @@ static inline void p4d_clear(p4d_t *p4dp)
64-bit pte atomically */
static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
{
- PVOP_VCALL3(pv_mmu_ops.set_pte_atomic, ptep,
- pte.pte, pte.pte >> 32);
+ PVOP_VCALL3(mmu.set_pte_atomic, ptep, pte.pte, pte.pte >> 32);
}
static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
- PVOP_VCALL3(pv_mmu_ops.pte_clear, mm, addr, ptep);
+ PVOP_VCALL3(mmu.pte_clear, mm, addr, ptep);
}
static inline void pmd_clear(pmd_t *pmdp)
{
- PVOP_VCALL1(pv_mmu_ops.pmd_clear, pmdp);
+ PVOP_VCALL1(mmu.pmd_clear, pmdp);
}
#else /* !CONFIG_X86_PAE */
static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
@@ -641,34 +619,34 @@ static inline void pmd_clear(pmd_t *pmdp)
#define __HAVE_ARCH_START_CONTEXT_SWITCH
static inline void arch_start_context_switch(struct task_struct *prev)
{
- PVOP_VCALL1(pv_cpu_ops.start_context_switch, prev);
+ PVOP_VCALL1(cpu.start_context_switch, prev);
}
static inline void arch_end_context_switch(struct task_struct *next)
{
- PVOP_VCALL1(pv_cpu_ops.end_context_switch, next);
+ PVOP_VCALL1(cpu.end_context_switch, next);
}
#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
static inline void arch_enter_lazy_mmu_mode(void)
{
- PVOP_VCALL0(pv_mmu_ops.lazy_mode.enter);
+ PVOP_VCALL0(mmu.lazy_mode.enter);
}
static inline void arch_leave_lazy_mmu_mode(void)
{
- PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave);
+ PVOP_VCALL0(mmu.lazy_mode.leave);
}
static inline void arch_flush_lazy_mmu_mode(void)
{
- PVOP_VCALL0(pv_mmu_ops.lazy_mode.flush);
+ PVOP_VCALL0(mmu.lazy_mode.flush);
}
static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
phys_addr_t phys, pgprot_t flags)
{
- pv_mmu_ops.set_fixmap(idx, phys, flags);
+ pv_ops.mmu.set_fixmap(idx, phys, flags);
}
#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
@@ -676,29 +654,32 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock,
u32 val)
{
- PVOP_VCALL2(pv_lock_ops.queued_spin_lock_slowpath, lock, val);
+ PVOP_VCALL2(lock.queued_spin_lock_slowpath, lock, val);
}
static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock)
{
- PVOP_VCALLEE1(pv_lock_ops.queued_spin_unlock, lock);
+ PVOP_VCALLEE1(lock.queued_spin_unlock, lock);
}
static __always_inline void pv_wait(u8 *ptr, u8 val)
{
- PVOP_VCALL2(pv_lock_ops.wait, ptr, val);
+ PVOP_VCALL2(lock.wait, ptr, val);
}
static __always_inline void pv_kick(int cpu)
{
- PVOP_VCALL1(pv_lock_ops.kick, cpu);
+ PVOP_VCALL1(lock.kick, cpu);
}
static __always_inline bool pv_vcpu_is_preempted(long cpu)
{
- return PVOP_CALLEE1(bool, pv_lock_ops.vcpu_is_preempted, cpu);
+ return PVOP_CALLEE1(bool, lock.vcpu_is_preempted, cpu);
}
+void __raw_callee_save___native_queued_spin_unlock(struct qspinlock *lock);
+bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
+
#endif /* SMP && PARAVIRT_SPINLOCKS */
#ifdef CONFIG_X86_32
@@ -780,22 +761,22 @@ static __always_inline bool pv_vcpu_is_preempted(long cpu)
static inline notrace unsigned long arch_local_save_flags(void)
{
- return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl);
+ return PVOP_CALLEE0(unsigned long, irq.save_fl);
}
static inline notrace void arch_local_irq_restore(unsigned long f)
{
- PVOP_VCALLEE1(pv_irq_ops.restore_fl, f);
+ PVOP_VCALLEE1(irq.restore_fl, f);
}
static inline notrace void arch_local_irq_disable(void)
{
- PVOP_VCALLEE0(pv_irq_ops.irq_disable);
+ PVOP_VCALLEE0(irq.irq_disable);
}
static inline notrace void arch_local_irq_enable(void)
{
- PVOP_VCALLEE0(pv_irq_ops.irq_enable);
+ PVOP_VCALLEE0(irq.irq_enable);
}
static inline notrace unsigned long arch_local_irq_save(void)
@@ -867,7 +848,7 @@ extern void default_banner(void);
COND_POP(set, CLBR_RCX, rcx); \
COND_POP(set, CLBR_RAX, rax)
-#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8)
+#define PARA_PATCH(off) ((off) / 8)
#define PARA_SITE(ptype, ops) _PVSITE(ptype, ops, .quad, 8)
#define PARA_INDIRECT(addr) *addr(%rip)
#else
@@ -882,35 +863,35 @@ extern void default_banner(void);
COND_POP(set, CLBR_EDI, edi); \
COND_POP(set, CLBR_EAX, eax)
-#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4)
+#define PARA_PATCH(off) ((off) / 4)
#define PARA_SITE(ptype, ops) _PVSITE(ptype, ops, .long, 4)
#define PARA_INDIRECT(addr) *%cs:addr
#endif
#define INTERRUPT_RETURN \
- PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), \
+ PARA_SITE(PARA_PATCH(PV_CPU_iret), \
ANNOTATE_RETPOLINE_SAFE; \
- jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret);)
+ jmp PARA_INDIRECT(pv_ops+PV_CPU_iret);)
#define DISABLE_INTERRUPTS(clobbers) \
- PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), \
+ PARA_SITE(PARA_PATCH(PV_IRQ_irq_disable), \
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
ANNOTATE_RETPOLINE_SAFE; \
- call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \
+ call PARA_INDIRECT(pv_ops+PV_IRQ_irq_disable); \
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
#define ENABLE_INTERRUPTS(clobbers) \
- PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), \
+ PARA_SITE(PARA_PATCH(PV_IRQ_irq_enable), \
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
ANNOTATE_RETPOLINE_SAFE; \
- call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \
+ call PARA_INDIRECT(pv_ops+PV_IRQ_irq_enable); \
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
#ifdef CONFIG_X86_32
#define GET_CR0_INTO_EAX \
push %ecx; push %edx; \
ANNOTATE_RETPOLINE_SAFE; \
- call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \
+ call PARA_INDIRECT(pv_ops+PV_CPU_read_cr0); \
pop %edx; pop %ecx
#else /* !CONFIG_X86_32 */
@@ -920,7 +901,7 @@ extern void default_banner(void);
* inlined, or the swapgs instruction must be trapped and emulated.
*/
#define SWAPGS_UNSAFE_STACK \
- PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), swapgs)
+ PARA_SITE(PARA_PATCH(PV_CPU_swapgs), swapgs)
/*
* Note: swapgs is very special, and in practise is either going to be
@@ -929,26 +910,26 @@ extern void default_banner(void);
* it.
*/
#define SWAPGS \
- PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), \
+ PARA_SITE(PARA_PATCH(PV_CPU_swapgs), \
ANNOTATE_RETPOLINE_SAFE; \
- call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \
+ call PARA_INDIRECT(pv_ops+PV_CPU_swapgs); \
)
#define GET_CR2_INTO_RAX \
ANNOTATE_RETPOLINE_SAFE; \
- call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2);
+ call PARA_INDIRECT(pv_ops+PV_MMU_read_cr2);
#define USERGS_SYSRET64 \
- PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
+ PARA_SITE(PARA_PATCH(PV_CPU_usergs_sysret64), \
ANNOTATE_RETPOLINE_SAFE; \
- jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64);)
+ jmp PARA_INDIRECT(pv_ops+PV_CPU_usergs_sysret64);)
#ifdef CONFIG_DEBUG_ENTRY
#define SAVE_FLAGS(clobbers) \
- PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), \
+ PARA_SITE(PARA_PATCH(PV_IRQ_save_fl), \
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
ANNOTATE_RETPOLINE_SAFE; \
- call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \
+ call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl); \
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
#endif
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index b2220536a7d4..462a54b2bb53 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -321,28 +321,23 @@ struct pv_lock_ops {
* number for each function using the offset which we use to indicate
* what to patch. */
struct paravirt_patch_template {
- struct pv_init_ops pv_init_ops;
- struct pv_time_ops pv_time_ops;
- struct pv_cpu_ops pv_cpu_ops;
- struct pv_irq_ops pv_irq_ops;
- struct pv_mmu_ops pv_mmu_ops;
- struct pv_lock_ops pv_lock_ops;
+ struct pv_init_ops init;
+ struct pv_time_ops time;
+ struct pv_cpu_ops cpu;
+ struct pv_irq_ops irq;
+ struct pv_mmu_ops mmu;
+ struct pv_lock_ops lock;
} __no_randomize_layout;
extern struct pv_info pv_info;
-extern struct pv_init_ops pv_init_ops;
-extern struct pv_time_ops pv_time_ops;
-extern struct pv_cpu_ops pv_cpu_ops;
-extern struct pv_irq_ops pv_irq_ops;
-extern struct pv_mmu_ops pv_mmu_ops;
-extern struct pv_lock_ops pv_lock_ops;
+extern struct paravirt_patch_template pv_ops;
#define PARAVIRT_PATCH(x) \
(offsetof(struct paravirt_patch_template, x) / sizeof(void *))
#define paravirt_type(op) \
[paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \
- [paravirt_opptr] "i" (&(op))
+ [paravirt_opptr] "i" (&(pv_ops.op))
#define paravirt_clobber(clobber) \
[paravirt_clobber] "i" (clobber)
@@ -503,9 +498,9 @@ int paravirt_disable_iospace(void);
#endif /* CONFIG_X86_32 */
#ifdef CONFIG_PARAVIRT_DEBUG
-#define PVOP_TEST_NULL(op) BUG_ON(op == NULL)
+#define PVOP_TEST_NULL(op) BUG_ON(pv_ops.op == NULL)
#else
-#define PVOP_TEST_NULL(op) ((void)op)
+#define PVOP_TEST_NULL(op) ((void)pv_ops.op)
#endif
#define PVOP_RETMASK(rettype) \
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 7985c20601b4..85dc63b97d07 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -594,7 +594,7 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
BUG_ON(p->len > MAX_PATCH_LEN);
/* prep the buffer with the original instructions */
memcpy(insnbuf, p->instr, p->len);
- used = pv_init_ops.patch(p->instrtype, insnbuf,
+ used = pv_ops.init.patch(p->instrtype, insnbuf,
(unsigned long)p->instr, p->len);
BUG_ON(used > p->len);
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 01de31db300d..46d14498e214 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -66,13 +66,11 @@ void common(void) {
#ifdef CONFIG_PARAVIRT
BLANK();
- OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
- OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
- OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
- OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
- OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
- OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
- OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
+ OFFSET(PV_IRQ_irq_disable, paravirt_patch_template, irq.irq_disable);
+ OFFSET(PV_IRQ_irq_enable, paravirt_patch_template, irq.irq_enable);
+ OFFSET(PV_CPU_iret, paravirt_patch_template, cpu.iret);
+ OFFSET(PV_CPU_read_cr0, paravirt_patch_template, cpu.read_cr0);
+ OFFSET(PV_MMU_read_cr2, paravirt_patch_template, mmu.read_cr2);
#endif
#ifdef CONFIG_XEN
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 3b9405e7ba2b..3384b03e717f 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -21,10 +21,11 @@ static char syscalls_ia32[] = {
int main(void)
{
#ifdef CONFIG_PARAVIRT
- OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
- OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
+ OFFSET(PV_CPU_usergs_sysret64, paravirt_patch_template,
+ cpu.usergs_sysret64);
+ OFFSET(PV_CPU_swapgs, paravirt_patch_template, cpu.swapgs);
#ifdef CONFIG_DEBUG_ENTRY
- OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl);
+ OFFSET(PV_IRQ_save_fl, paravirt_patch_template, irq.save_fl);
#endif
BLANK();
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 84dee5ab745a..1f089e1c79d3 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1242,7 +1242,7 @@ static void generic_identify(struct cpuinfo_x86 *c)
# ifdef CONFIG_PARAVIRT
do {
extern void native_iret(void);
- if (pv_cpu_ops.iret == native_iret)
+ if (pv_ops.cpu.iret == native_iret)
set_cpu_bug(c, X86_BUG_ESPFIX);
} while (0);
# else
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 8e005329648b..d9ab49bed8af 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -97,14 +97,14 @@ static void __init vmware_sched_clock_setup(void)
d->cyc2ns_offset = mul_u64_u32_shr(tsc_now, d->cyc2ns_mul,
d->cyc2ns_shift);
- pv_time_ops.sched_clock = vmware_sched_clock;
+ pv_ops.time.sched_clock = vmware_sched_clock;
pr_info("using sched offset of %llu ns\n", d->cyc2ns_offset);
}
static void __init vmware_paravirt_ops_setup(void)
{
pv_info.name = "VMware hypervisor";
- pv_cpu_ops.io_delay = paravirt_nop;
+ pv_ops.cpu.io_delay = paravirt_nop;
if (vmware_tsc_khz && vmw_sched_clock)
vmware_sched_clock_setup();
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index d9b71924c23c..ba4bfb7f6a36 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -283,7 +283,7 @@ static void __init paravirt_ops_setup(void)
pv_info.name = "KVM";
if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
- pv_cpu_ops.io_delay = kvm_io_delay;
+ pv_ops.cpu.io_delay = kvm_io_delay;
#ifdef CONFIG_X86_IO_APIC
no_timer_check = 1;
@@ -632,14 +632,14 @@ static void __init kvm_guest_init(void)
if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
has_steal_clock = 1;
- pv_time_ops.steal_clock = kvm_steal_clock;
+ pv_ops.time.steal_clock = kvm_steal_clock;
}
if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
!kvm_para_has_hint(KVM_HINTS_REALTIME) &&
kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
- pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
- pv_mmu_ops.tlb_remove_table = tlb_remove_table;
+ pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
+ pv_ops.mmu.tlb_remove_table = tlb_remove_table;
}
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
@@ -850,13 +850,14 @@ void __init kvm_spinlock_init(void)
return;
__pv_init_lock_hash();
- pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
- pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
- pv_lock_ops.wait = kvm_wait;
- pv_lock_ops.kick = kvm_kick_cpu;
+ pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
+ pv_ops.lock.queued_spin_unlock =
+ PV_CALLEE_SAVE(__pv_queued_spin_unlock);
+ pv_ops.lock.wait = kvm_wait;
+ pv_ops.lock.kick = kvm_kick_cpu;
if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
- pv_lock_ops.vcpu_is_preempted =
+ pv_ops.lock.vcpu_is_preempted =
PV_CALLEE_SAVE(__kvm_vcpu_is_preempted);
}
}
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 1e6764648af3..a36b93a722a2 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -116,13 +116,13 @@ static u64 kvm_sched_clock_read(void)
static inline void kvm_sched_clock_init(bool stable)
{
if (!stable) {
- pv_time_ops.sched_clock = kvm_clock_read;
+ pv_ops.time.sched_clock = kvm_clock_read;
clear_sched_clock_stable();
return;
}
kvm_sched_clock_offset = kvm_clock_read();
- pv_time_ops.sched_clock = kvm_sched_clock_read;
+ pv_ops.time.sched_clock = kvm_sched_clock_read;
pr_info("kvm-clock: using sched offset of %llu cycles",
kvm_sched_clock_offset);
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 71f2d1125ec0..4f75d0cf6305 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -17,7 +17,7 @@ PV_CALLEE_SAVE_REGS_THUNK(__native_queued_spin_unlock);
bool pv_is_native_spin_unlock(void)
{
- return pv_lock_ops.queued_spin_unlock.func ==
+ return pv_ops.lock.queued_spin_unlock.func ==
__raw_callee_save___native_queued_spin_unlock;
}
@@ -29,17 +29,6 @@ PV_CALLEE_SAVE_REGS_THUNK(__native_vcpu_is_preempted);
bool pv_is_native_vcpu_is_preempted(void)
{
- return pv_lock_ops.vcpu_is_preempted.func ==
+ return pv_ops.lock.vcpu_is_preempted.func ==
__raw_callee_save___native_vcpu_is_preempted;
}
-
-struct pv_lock_ops pv_lock_ops = {
-#ifdef CONFIG_SMP
- .queued_spin_lock_slowpath = native_queued_spin_lock_slowpath,
- .queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock),
- .wait = paravirt_nop,
- .kick = paravirt_nop,
- .vcpu_is_preempted = PV_CALLEE_SAVE(__native_vcpu_is_preempted),
-#endif /* SMP */
-};
-EXPORT_SYMBOL(pv_lock_ops);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 287d34513f6a..5fc2b94e1591 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -128,29 +128,14 @@ void __init native_pv_lock_init(void)
static_branch_disable(&virt_spin_lock_key);
}
-/*
- * Neat trick to map patch type back to the call within the
- * corresponding structure.
- */
-static void *get_call_destination(u8 type)
-{
- struct paravirt_patch_template tmpl = {
- .pv_init_ops = pv_init_ops,
- .pv_time_ops = pv_time_ops,
- .pv_cpu_ops = pv_cpu_ops,
- .pv_irq_ops = pv_irq_ops,
- .pv_mmu_ops = pv_mmu_ops,
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
- .pv_lock_ops = pv_lock_ops,
-#endif
- };
- return *((void **)&tmpl + type);
-}
-
unsigned paravirt_patch_default(u8 type, void *insnbuf,
unsigned long addr, unsigned len)
{
- void *opfunc = get_call_destination(type);
+ /*
+ * Neat trick to map patch type back to the call within the
+ * corresponding structure.
+ */
+ void *opfunc = *((void **)&pv_ops + type);
unsigned ret;
if (opfunc == NULL)
@@ -165,8 +150,8 @@ unsigned paravirt_patch_default(u8 type, void *insnbuf,
else if (opfunc == _paravirt_ident_64)
ret = paravirt_patch_ident_64(insnbuf, len);
- else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
- type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64))
+ else if (type == PARAVIRT_PATCH(cpu.iret) ||
+ type == PARAVIRT_PATCH(cpu.usergs_sysret64))
/* If operation requires a jmp, then jmp */
ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
else
@@ -316,77 +301,6 @@ struct pv_info pv_info = {
#endif
};
-struct pv_init_ops pv_init_ops = {
- .patch = native_patch,
-};
-
-struct pv_time_ops pv_time_ops = {
- .sched_clock = native_sched_clock,
- .steal_clock = native_steal_clock,
-};
-
-__visible struct pv_irq_ops pv_irq_ops = {
- .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
- .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl),
- .irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable),
- .irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
- .safe_halt = native_safe_halt,
- .halt = native_halt,
-};
-
-__visible struct pv_cpu_ops pv_cpu_ops = {
- .cpuid = native_cpuid,
- .get_debugreg = native_get_debugreg,
- .set_debugreg = native_set_debugreg,
- .read_cr0 = native_read_cr0,
- .write_cr0 = native_write_cr0,
- .write_cr4 = native_write_cr4,
-#ifdef CONFIG_X86_64
- .read_cr8 = native_read_cr8,
- .write_cr8 = native_write_cr8,
-#endif
- .wbinvd = native_wbinvd,
- .read_msr = native_read_msr,
- .write_msr = native_write_msr,
- .read_msr_safe = native_read_msr_safe,
- .write_msr_safe = native_write_msr_safe,
- .read_pmc = native_read_pmc,
- .load_tr_desc = native_load_tr_desc,
- .set_ldt = native_set_ldt,
- .load_gdt = native_load_gdt,
- .load_idt = native_load_idt,
- .store_tr = native_store_tr,
- .load_tls = native_load_tls,
-#ifdef CONFIG_X86_64
- .load_gs_index = native_load_gs_index,
-#endif
- .write_ldt_entry = native_write_ldt_entry,
- .write_gdt_entry = native_write_gdt_entry,
- .write_idt_entry = native_write_idt_entry,
-
- .alloc_ldt = paravirt_nop,
- .free_ldt = paravirt_nop,
-
- .load_sp0 = native_load_sp0,
-
-#ifdef CONFIG_X86_64
- .usergs_sysret64 = native_usergs_sysret64,
-#endif
- .iret = native_iret,
- .swapgs = native_swapgs,
-
- .set_iopl_mask = native_set_iopl_mask,
- .io_delay = native_io_delay,
-
- .start_context_switch = paravirt_nop,
- .end_context_switch = paravirt_nop,
-};
-
-/* At this point, native_get/set_debugreg has real function entries */
-NOKPROBE_SYMBOL(native_get_debugreg);
-NOKPROBE_SYMBOL(native_set_debugreg);
-NOKPROBE_SYMBOL(native_load_idt);
-
#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
/* 32-bit pagetable entries */
#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_32)
@@ -395,85 +309,162 @@ NOKPROBE_SYMBOL(native_load_idt);
#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64)
#endif
-struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
-
- .read_cr2 = native_read_cr2,
- .write_cr2 = native_write_cr2,
- .read_cr3 = __native_read_cr3,
- .write_cr3 = native_write_cr3,
-
- .flush_tlb_user = native_flush_tlb,
- .flush_tlb_kernel = native_flush_tlb_global,
- .flush_tlb_one_user = native_flush_tlb_one_user,
- .flush_tlb_others = native_flush_tlb_others,
- .tlb_remove_table = (void (*)(struct mmu_gather *, void *))tlb_remove_page,
-
- .pgd_alloc = __paravirt_pgd_alloc,
- .pgd_free = paravirt_nop,
+struct paravirt_patch_template pv_ops = {
+ /* Init ops. */
+ .init.patch = native_patch,
+
+ /* Time ops. */
+ .time.sched_clock = native_sched_clock,
+ .time.steal_clock = native_steal_clock,
+
+ /* Cpu ops. */
+ .cpu.cpuid = native_cpuid,
+ .cpu.get_debugreg = native_get_debugreg,
+ .cpu.set_debugreg = native_set_debugreg,
+ .cpu.read_cr0 = native_read_cr0,
+ .cpu.write_cr0 = native_write_cr0,
+ .cpu.write_cr4 = native_write_cr4,
+#ifdef CONFIG_X86_64
+ .cpu.read_cr8 = native_read_cr8,
+ .cpu.write_cr8 = native_write_cr8,
+#endif
+ .cpu.wbinvd = native_wbinvd,
+ .cpu.read_msr = native_read_msr,
+ .cpu.write_msr = native_write_msr,
+ .cpu.read_msr_safe = native_read_msr_safe,
+ .cpu.write_msr_safe = native_write_msr_safe,
+ .cpu.read_pmc = native_read_pmc,
+ .cpu.load_tr_desc = native_load_tr_desc,
+ .cpu.set_ldt = native_set_ldt,
+ .cpu.load_gdt = native_load_gdt,
+ .cpu.load_idt = native_load_idt,
+ .cpu.store_tr = native_store_tr,
+ .cpu.load_tls = native_load_tls,
+#ifdef CONFIG_X86_64
+ .cpu.load_gs_index = native_load_gs_index,
+#endif
+ .cpu.write_ldt_entry = native_write_ldt_entry,
+ .cpu.write_gdt_entry = native_write_gdt_entry,
+ .cpu.write_idt_entry = native_write_idt_entry,
- .alloc_pte = paravirt_nop,
- .alloc_pmd = paravirt_nop,
- .alloc_pud = paravirt_nop,
- .alloc_p4d = paravirt_nop,
- .release_pte = paravirt_nop,
- .release_pmd = paravirt_nop,
- .release_pud = paravirt_nop,
- .release_p4d = paravirt_nop,
+ .cpu.alloc_ldt = paravirt_nop,
+ .cpu.free_ldt = paravirt_nop,
- .set_pte = native_set_pte,
- .set_pte_at = native_set_pte_at,
- .set_pmd = native_set_pmd,
+ .cpu.load_sp0 = native_load_sp0,
- .ptep_modify_prot_start = __ptep_modify_prot_start,
- .ptep_modify_prot_commit = __ptep_modify_prot_commit,
+#ifdef CONFIG_X86_64
+ .cpu.usergs_sysret64 = native_usergs_sysret64,
+#endif
+ .cpu.iret = native_iret,
+ .cpu.swapgs = native_swapgs,
+
+ .cpu.set_iopl_mask = native_set_iopl_mask,
+ .cpu.io_delay = native_io_delay,
+
+ .cpu.start_context_switch = paravirt_nop,
+ .cpu.end_context_switch = paravirt_nop,
+
+ /* Irq ops. */
+ .irq.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
+ .irq.restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl),
+ .irq.irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable),
+ .irq.irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
+ .irq.safe_halt = native_safe_halt,
+ .irq.halt = native_halt,
+
+ /* Mmu ops. */
+ .mmu.read_cr2 = native_read_cr2,
+ .mmu.write_cr2 = native_write_cr2,
+ .mmu.read_cr3 = __native_read_cr3,
+ .mmu.write_cr3 = native_write_cr3,
+
+ .mmu.flush_tlb_user = native_flush_tlb,
+ .mmu.flush_tlb_kernel = native_flush_tlb_global,
+ .mmu.flush_tlb_one_user = native_flush_tlb_one_user,
+ .mmu.flush_tlb_others = native_flush_tlb_others,
+ .mmu.tlb_remove_table =
+ (void (*)(struct mmu_gather *, void *))tlb_remove_page,
+
+ .mmu.pgd_alloc = __paravirt_pgd_alloc,
+ .mmu.pgd_free = paravirt_nop,
+
+ .mmu.alloc_pte = paravirt_nop,
+ .mmu.alloc_pmd = paravirt_nop,
+ .mmu.alloc_pud = paravirt_nop,
+ .mmu.alloc_p4d = paravirt_nop,
+ .mmu.release_pte = paravirt_nop,
+ .mmu.release_pmd = paravirt_nop,
+ .mmu.release_pud = paravirt_nop,
+ .mmu.release_p4d = paravirt_nop,
+
+ .mmu.set_pte = native_set_pte,
+ .mmu.set_pte_at = native_set_pte_at,
+ .mmu.set_pmd = native_set_pmd,
+
+ .mmu.ptep_modify_prot_start = __ptep_modify_prot_start,
+ .mmu.ptep_modify_prot_commit = __ptep_modify_prot_commit,
#if CONFIG_PGTABLE_LEVELS >= 3
#ifdef CONFIG_X86_PAE
- .set_pte_atomic = native_set_pte_atomic,
- .pte_clear = native_pte_clear,
- .pmd_clear = native_pmd_clear,
+ .mmu.set_pte_atomic = native_set_pte_atomic,
+ .mmu.pte_clear = native_pte_clear,
+ .mmu.pmd_clear = native_pmd_clear,
#endif
- .set_pud = native_set_pud,
+ .mmu.set_pud = native_set_pud,
- .pmd_val = PTE_IDENT,
- .make_pmd = PTE_IDENT,
+ .mmu.pmd_val = PTE_IDENT,
+ .mmu.make_pmd = PTE_IDENT,
#if CONFIG_PGTABLE_LEVELS >= 4
- .pud_val = PTE_IDENT,
- .make_pud = PTE_IDENT,
+ .mmu.pud_val = PTE_IDENT,
+ .mmu.make_pud = PTE_IDENT,
- .set_p4d = native_set_p4d,
+ .mmu.set_p4d = native_set_p4d,
#if CONFIG_PGTABLE_LEVELS >= 5
- .p4d_val = PTE_IDENT,
- .make_p4d = PTE_IDENT,
+ .mmu.p4d_val = PTE_IDENT,
+ .mmu.make_p4d = PTE_IDENT,
- .set_pgd = native_set_pgd,
+ .mmu.set_pgd = native_set_pgd,
#endif /* CONFIG_PGTABLE_LEVELS >= 5 */
#endif /* CONFIG_PGTABLE_LEVELS >= 4 */
#endif /* CONFIG_PGTABLE_LEVELS >= 3 */
- .pte_val = PTE_IDENT,
- .pgd_val = PTE_IDENT,
+ .mmu.pte_val = PTE_IDENT,
+ .mmu.pgd_val = PTE_IDENT,
- .make_pte = PTE_IDENT,
- .make_pgd = PTE_IDENT,
+ .mmu.make_pte = PTE_IDENT,
+ .mmu.make_pgd = PTE_IDENT,
- .dup_mmap = paravirt_nop,
- .exit_mmap = paravirt_nop,
- .activate_mm = paravirt_nop,
+ .mmu.dup_mmap = paravirt_nop,
+ .mmu.exit_mmap = paravirt_nop,
+ .mmu.activate_mm = paravirt_nop,
- .lazy_mode = {
+ .mmu.lazy_mode = {
.enter = paravirt_nop,
.leave = paravirt_nop,
.flush = paravirt_nop,
},
- .set_fixmap = native_set_fixmap,
+ .mmu.set_fixmap = native_set_fixmap,
+
+#if defined(CONFIG_PARAVIRT_SPINLOCKS)
+ /* Lock ops. */
+#ifdef CONFIG_SMP
+ .lock.queued_spin_lock_slowpath = native_queued_spin_lock_slowpath,
+ .lock.queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock),
+ .lock.wait = paravirt_nop,
+ .lock.kick = paravirt_nop,
+ .lock.vcpu_is_preempted =
+ PV_CALLEE_SAVE(__native_vcpu_is_preempted),
+#endif /* SMP */
+#endif
};
-EXPORT_SYMBOL_GPL(pv_time_ops);
-EXPORT_SYMBOL (pv_cpu_ops);
-EXPORT_SYMBOL (pv_mmu_ops);
+/* At this point, native_get/set_debugreg has real function entries */
+NOKPROBE_SYMBOL(native_get_debugreg);
+NOKPROBE_SYMBOL(native_set_debugreg);
+NOKPROBE_SYMBOL(native_load_idt);
+
+EXPORT_SYMBOL_GPL(pv_ops);
EXPORT_SYMBOL_GPL(pv_info);
-EXPORT_SYMBOL (pv_irq_ops);
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index e5c3a438149e..026fa43e9261 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -1,18 +1,18 @@
// SPDX-License-Identifier: GPL-2.0
#include <asm/paravirt.h>
-DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
-DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
-DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
-DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
-DEF_NATIVE(pv_cpu_ops, iret, "iret");
-DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
-DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
-DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
+DEF_NATIVE(irq, irq_disable, "cli");
+DEF_NATIVE(irq, irq_enable, "sti");
+DEF_NATIVE(irq, restore_fl, "push %eax; popf");
+DEF_NATIVE(irq, save_fl, "pushf; pop %eax");
+DEF_NATIVE(cpu, iret, "iret");
+DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax");
+DEF_NATIVE(mmu, write_cr3, "mov %eax, %cr3");
+DEF_NATIVE(mmu, read_cr3, "mov %cr3, %eax");
#if defined(CONFIG_PARAVIRT_SPINLOCKS)
-DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)");
-DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %eax, %eax");
+DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)");
+DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
#endif
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
@@ -41,27 +41,27 @@ unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
end = end_##ops##_##x; \
goto patch_site
switch (type) {
- PATCH_SITE(pv_irq_ops, irq_disable);
- PATCH_SITE(pv_irq_ops, irq_enable);
- PATCH_SITE(pv_irq_ops, restore_fl);
- PATCH_SITE(pv_irq_ops, save_fl);
- PATCH_SITE(pv_cpu_ops, iret);
- PATCH_SITE(pv_mmu_ops, read_cr2);
- PATCH_SITE(pv_mmu_ops, read_cr3);
- PATCH_SITE(pv_mmu_ops, write_cr3);
+ PATCH_SITE(irq, irq_disable);
+ PATCH_SITE(irq, irq_enable);
+ PATCH_SITE(irq, restore_fl);
+ PATCH_SITE(irq, save_fl);
+ PATCH_SITE(cpu, iret);
+ PATCH_SITE(mmu, read_cr2);
+ PATCH_SITE(mmu, read_cr3);
+ PATCH_SITE(mmu, write_cr3);
#if defined(CONFIG_PARAVIRT_SPINLOCKS)
- case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
+ case PARAVIRT_PATCH(lock.queued_spin_unlock):
if (pv_is_native_spin_unlock()) {
- start = start_pv_lock_ops_queued_spin_unlock;
- end = end_pv_lock_ops_queued_spin_unlock;
+ start = start_lock_queued_spin_unlock;
+ end = end_lock_queued_spin_unlock;
goto patch_site;
}
goto patch_default;
- case PARAVIRT_PATCH(pv_lock_ops.vcpu_is_preempted):
+ case PARAVIRT_PATCH(lock.vcpu_is_preempted):
if (pv_is_native_vcpu_is_preempted()) {
- start = start_pv_lock_ops_vcpu_is_preempted;
- end = end_pv_lock_ops_vcpu_is_preempted;
+ start = start_lock_vcpu_is_preempted;
+ end = end_lock_vcpu_is_preempted;
goto patch_site;
}
goto patch_default;
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 835f1985a115..582e893728e8 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -3,24 +3,24 @@
#include <asm/asm-offsets.h>
#include <linux/stringify.h>
-DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
-DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
-DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq");
-DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
-DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
-DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
-DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
-DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
+DEF_NATIVE(irq, irq_disable, "cli");
+DEF_NATIVE(irq, irq_enable, "sti");
+DEF_NATIVE(irq, restore_fl, "pushq %rdi; popfq");
+DEF_NATIVE(irq, save_fl, "pushfq; popq %rax");
+DEF_NATIVE(mmu, read_cr2, "movq %cr2, %rax");
+DEF_NATIVE(mmu, read_cr3, "movq %cr3, %rax");
+DEF_NATIVE(mmu, write_cr3, "movq %rdi, %cr3");
+DEF_NATIVE(cpu, wbinvd, "wbinvd");
-DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
-DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
+DEF_NATIVE(cpu, usergs_sysret64, "swapgs; sysretq");
+DEF_NATIVE(cpu, swapgs, "swapgs");
DEF_NATIVE(, mov32, "mov %edi, %eax");
DEF_NATIVE(, mov64, "mov %rdi, %rax");
#if defined(CONFIG_PARAVIRT_SPINLOCKS)
-DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)");
-DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %eax, %eax");
+DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%rdi)");
+DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
#endif
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
@@ -49,29 +49,29 @@ unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
end = end_##ops##_##x; \
goto patch_site
switch(type) {
- PATCH_SITE(pv_irq_ops, restore_fl);
- PATCH_SITE(pv_irq_ops, save_fl);
- PATCH_SITE(pv_irq_ops, irq_enable);
- PATCH_SITE(pv_irq_ops, irq_disable);
- PATCH_SITE(pv_cpu_ops, usergs_sysret64);
- PATCH_SITE(pv_cpu_ops, swapgs);
- PATCH_SITE(pv_mmu_ops, read_cr2);
- PATCH_SITE(pv_mmu_ops, read_cr3);
- PATCH_SITE(pv_mmu_ops, write_cr3);
- PATCH_SITE(pv_cpu_ops, wbinvd);
+ PATCH_SITE(irq, restore_fl);
+ PATCH_SITE(irq, save_fl);
+ PATCH_SITE(irq, irq_enable);
+ PATCH_SITE(irq, irq_disable);
+ PATCH_SITE(cpu, usergs_sysret64);
+ PATCH_SITE(cpu, swapgs);
+ PATCH_SITE(mmu, read_cr2);
+ PATCH_SITE(mmu, read_cr3);
+ PATCH_SITE(mmu, write_cr3);
+ PATCH_SITE(cpu, wbinvd);
#if defined(CONFIG_PARAVIRT_SPINLOCKS)
- case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
+ case PARAVIRT_PATCH(lock.queued_spin_unlock):
if (pv_is_native_spin_unlock()) {
- start = start_pv_lock_ops_queued_spin_unlock;
- end = end_pv_lock_ops_queued_spin_unlock;
+ start = start_lock_queued_spin_unlock;
+ end = end_lock_queued_spin_unlock;
goto patch_site;
}
goto patch_default;
- case PARAVIRT_PATCH(pv_lock_ops.vcpu_is_preempted):
+ case PARAVIRT_PATCH(lock.vcpu_is_preempted):
if (pv_is_native_vcpu_is_preempted()) {
- start = start_pv_lock_ops_vcpu_is_preempted;
- end = end_pv_lock_ops_vcpu_is_preempted;
+ start = start_lock_vcpu_is_preempted;
+ end = end_lock_vcpu_is_preempted;
goto patch_site;
}
goto patch_default;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 1463468ba9a0..9044aa5e2389 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -246,7 +246,7 @@ unsigned long long sched_clock(void)
bool using_native_sched_clock(void)
{
- return pv_time_ops.sched_clock == native_sched_clock;
+ return pv_ops.time.sched_clock == native_sched_clock;
}
#else
unsigned long long
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index f194e5e1e95c..789918d78697 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -73,10 +73,10 @@ static unsigned __init vsmp_patch(u8 type, void *ibuf,
unsigned long addr, unsigned len)
{
switch (type) {
- case PARAVIRT_PATCH(pv_irq_ops.irq_enable):
- case PARAVIRT_PATCH(pv_irq_ops.irq_disable):
- case PARAVIRT_PATCH(pv_irq_ops.save_fl):
- case PARAVIRT_PATCH(pv_irq_ops.restore_fl):
+ case PARAVIRT_PATCH(irq.irq_enable):
+ case PARAVIRT_PATCH(irq.irq_disable):
+ case PARAVIRT_PATCH(irq.save_fl):
+ case PARAVIRT_PATCH(irq.restore_fl):
return paravirt_patch_default(type, ibuf, addr, len);
default:
return native_patch(type, ibuf, addr, len);
@@ -111,11 +111,11 @@ static void __init set_vsmp_pv_ops(void)
if (cap & ctl & (1 << 4)) {
/* Setup irq ops and turn on vSMP IRQ fastpath handling */
- pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable);
- pv_irq_ops.irq_enable = PV_CALLEE_SAVE(vsmp_irq_enable);
- pv_irq_ops.save_fl = PV_CALLEE_SAVE(vsmp_save_fl);
- pv_irq_ops.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl);
- pv_init_ops.patch = vsmp_patch;
+ pv_ops.irq.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable);
+ pv_ops.irq.irq_enable = PV_CALLEE_SAVE(vsmp_irq_enable);
+ pv_ops.irq.save_fl = PV_CALLEE_SAVE(vsmp_save_fl);
+ pv_ops.irq.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl);
+ pv_ops.init.patch = vsmp_patch;
ctl &= ~(1 << 4);
}
writel(ctl, address + 4);
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 52a7c3faee0c..ec7a4209f310 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -995,11 +995,14 @@ void __init xen_setup_vcpu_info_placement(void)
* percpu area for all cpus, so make use of it.
*/
if (xen_have_vcpu_info_placement) {
- pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
- pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
- pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
- pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
- pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
+ pv_ops.irq.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
+ pv_ops.irq.restore_fl =
+ __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
+ pv_ops.irq.irq_disable =
+ __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
+ pv_ops.irq.irq_enable =
+ __PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
+ pv_ops.mmu.read_cr2 = xen_read_cr2_direct;
}
}
@@ -1174,14 +1177,14 @@ static void __init xen_boot_params_init_edd(void)
*/
static void __init xen_setup_gdt(int cpu)
{
- pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot;
- pv_cpu_ops.load_gdt = xen_load_gdt_boot;
+ pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry_boot;
+ pv_ops.cpu.load_gdt = xen_load_gdt_boot;
setup_stack_canary_segment(cpu);
switch_to_new_gdt(cpu);
- pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry;
- pv_cpu_ops.load_gdt = xen_load_gdt;
+ pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry;
+ pv_ops.cpu.load_gdt = xen_load_gdt;
}
static void __init xen_dom0_set_legacy_features(void)
@@ -1206,8 +1209,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
/* Install Xen paravirt ops */
pv_info = xen_info;
- pv_init_ops.patch = paravirt_patch_default;
- pv_cpu_ops = xen_cpu_ops;
+ pv_ops.init.patch = paravirt_patch_default;
+ pv_ops.cpu = xen_cpu_ops;
xen_init_irq_ops();
/*
@@ -1276,8 +1279,10 @@ asmlinkage __visible void __init xen_start_kernel(void)
#endif
if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
- pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
- pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
+ pv_ops.mmu.ptep_modify_prot_start =
+ xen_ptep_modify_prot_start;
+ pv_ops.mmu.ptep_modify_prot_commit =
+ xen_ptep_modify_prot_commit;
}
machine_ops = xen_machine_ops;
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 7515a19fd324..850c93f346c7 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -128,6 +128,6 @@ static const struct pv_irq_ops xen_irq_ops __initconst = {
void __init xen_init_irq_ops(void)
{
- pv_irq_ops = xen_irq_ops;
+ pv_ops.irq = xen_irq_ops;
x86_init.irqs.intr_init = xen_init_IRQ;
}
diff --git a/arch/x86/xen/mmu_hvm.c b/arch/x86/xen/mmu_hvm.c
index dd2ad82eee80..57409373750f 100644
--- a/arch/x86/xen/mmu_hvm.c
+++ b/arch/x86/xen/mmu_hvm.c
@@ -73,7 +73,7 @@ static int is_pagetable_dying_supported(void)
void __init xen_hvm_init_mmu_ops(void)
{
if (is_pagetable_dying_supported())
- pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap;
+ pv_ops.mmu.exit_mmap = xen_hvm_exit_mmap;
#ifdef CONFIG_PROC_VMCORE
WARN_ON(register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram));
#endif
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 1e9098f53967..12180e7283b7 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2216,7 +2216,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
set_page_prot(initial_page_table, PAGE_KERNEL);
set_page_prot(initial_kernel_pmd, PAGE_KERNEL);
- pv_mmu_ops.write_cr3 = &xen_write_cr3;
+ pv_ops.mmu.write_cr3 = &xen_write_cr3;
}
/*
@@ -2365,27 +2365,27 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
static void __init xen_post_allocator_init(void)
{
- pv_mmu_ops.set_pte = xen_set_pte;
- pv_mmu_ops.set_pmd = xen_set_pmd;
- pv_mmu_ops.set_pud = xen_set_pud;
+ pv_ops.mmu.set_pte = xen_set_pte;
+ pv_ops.mmu.set_pmd = xen_set_pmd;
+ pv_ops.mmu.set_pud = xen_set_pud;
#ifdef CONFIG_X86_64
- pv_mmu_ops.set_p4d = xen_set_p4d;
+ pv_ops.mmu.set_p4d = xen_set_p4d;
#endif
/* This will work as long as patching hasn't happened yet
(which it hasn't) */
- pv_mmu_ops.alloc_pte = xen_alloc_pte;
- pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
- pv_mmu_ops.release_pte = xen_release_pte;
- pv_mmu_ops.release_pmd = xen_release_pmd;
+ pv_ops.mmu.alloc_pte = xen_alloc_pte;
+ pv_ops.mmu.alloc_pmd = xen_alloc_pmd;
+ pv_ops.mmu.release_pte = xen_release_pte;
+ pv_ops.mmu.release_pmd = xen_release_pmd;
#ifdef CONFIG_X86_64
- pv_mmu_ops.alloc_pud = xen_alloc_pud;
- pv_mmu_ops.release_pud = xen_release_pud;
+ pv_ops.mmu.alloc_pud = xen_alloc_pud;
+ pv_ops.mmu.release_pud = xen_release_pud;
#endif
- pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte);
+ pv_ops.mmu.make_pte = PV_CALLEE_SAVE(xen_make_pte);
#ifdef CONFIG_X86_64
- pv_mmu_ops.write_cr3 = &xen_write_cr3;
+ pv_ops.mmu.write_cr3 = &xen_write_cr3;
#endif
}
@@ -2473,7 +2473,7 @@ void __init xen_init_mmu_ops(void)
x86_init.paging.pagetable_init = xen_pagetable_init;
x86_init.hyper.init_after_bootmem = xen_after_bootmem;
- pv_mmu_ops = xen_mmu_ops;
+ pv_ops.mmu = xen_mmu_ops;
memset(dummy_mapping, 0xff, PAGE_SIZE);
}
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 973f10e05211..23f6793af88a 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -141,11 +141,12 @@ void __init xen_init_spinlocks(void)
printk(KERN_DEBUG "xen: PV spinlocks enabled\n");
__pv_init_lock_hash();
- pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
- pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
- pv_lock_ops.wait = xen_qlock_wait;
- pv_lock_ops.kick = xen_qlock_kick;
- pv_lock_ops.vcpu_is_preempted = PV_CALLEE_SAVE(xen_vcpu_stolen);
+ pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
+ pv_ops.lock.queued_spin_unlock =
+ PV_CALLEE_SAVE(__pv_queued_spin_unlock);
+ pv_ops.lock.wait = xen_qlock_wait;
+ pv_ops.lock.kick = xen_qlock_kick;
+ pv_ops.lock.vcpu_is_preempted = PV_CALLEE_SAVE(xen_vcpu_stolen);
}
static __init int xen_parse_nopvspin(char *arg)
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index c84f1e039d84..72bf446c3fee 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -513,7 +513,7 @@ static void __init xen_time_init(void)
void __init xen_init_time_ops(void)
{
xen_sched_clock_offset = xen_clocksource_read();
- pv_time_ops = xen_time_ops;
+ pv_ops.time = xen_time_ops;
x86_init.timers.timer_init = xen_time_init;
x86_init.timers.setup_percpu_clockev = x86_init_noop;
@@ -555,7 +555,7 @@ void __init xen_hvm_init_time_ops(void)
}
xen_sched_clock_offset = xen_clocksource_read();
- pv_time_ops = xen_time_ops;
+ pv_ops.time = xen_time_ops;
x86_init.timers.setup_percpu_clockev = xen_time_init;
x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
diff --git a/drivers/xen/time.c b/drivers/xen/time.c
index 3e741cd1409c..0968859c29d0 100644
--- a/drivers/xen/time.c
+++ b/drivers/xen/time.c
@@ -175,7 +175,7 @@ void __init xen_time_setup_guest(void)
xen_runstate_remote = !HYPERVISOR_vm_assist(VMASST_CMD_enable,
VMASST_TYPE_runstate_update_flag);
- pv_time_ops.steal_clock = xen_steal_clock;
+ pv_ops.time.steal_clock = xen_steal_clock;
static_key_slow_inc(¶virt_steal_enabled);
if (xen_runstate_remote)
--
2.16.4
^ permalink raw reply related
* [PATCH v3 07/15] x86/paravirt: remove clobbers from struct paravirt_patch_site
From: Juergen Gross @ 2018-08-28 7:40 UTC (permalink / raw)
To: linux-kernel, xen-devel, x86, virtualization
Cc: Juergen Gross, boris.ostrovsky, rusty, mingo, hpa, akataria, tglx
In-Reply-To: <20180828074026.820-1-jgross@suse.com>
There is no need any longer to store the clobbers in struct
paravirt_patch_site. Remove clobbers from the struct and from the
related macros.
While at it fix some lines longer than 80 characters.
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/include/asm/paravirt.h | 33 +++++++++++++++------------------
arch/x86/include/asm/paravirt_types.h | 1 -
2 files changed, 15 insertions(+), 19 deletions(-)
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index e375d4266b53..e1364cb40ce5 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -827,7 +827,7 @@ extern void default_banner(void);
#else /* __ASSEMBLY__ */
-#define _PVSITE(ptype, clobbers, ops, word, algn) \
+#define _PVSITE(ptype, ops, word, algn) \
771:; \
ops; \
772:; \
@@ -836,7 +836,6 @@ extern void default_banner(void);
word 771b; \
.byte ptype; \
.byte 772b-771b; \
- .short clobbers; \
.popsection
@@ -869,7 +868,7 @@ extern void default_banner(void);
COND_POP(set, CLBR_RAX, rax)
#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8)
-#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
+#define PARA_SITE(ptype, ops) _PVSITE(ptype, ops, .quad, 8)
#define PARA_INDIRECT(addr) *addr(%rip)
#else
#define PV_SAVE_REGS(set) \
@@ -884,26 +883,26 @@ extern void default_banner(void);
COND_POP(set, CLBR_EAX, eax)
#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4)
-#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4)
+#define PARA_SITE(ptype, ops) _PVSITE(ptype, ops, .long, 4)
#define PARA_INDIRECT(addr) *%cs:addr
#endif
#define INTERRUPT_RETURN \
- PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \
- ANNOTATE_RETPOLINE_SAFE; \
+ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), \
+ ANNOTATE_RETPOLINE_SAFE; \
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret);)
#define DISABLE_INTERRUPTS(clobbers) \
- PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
+ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), \
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
- ANNOTATE_RETPOLINE_SAFE; \
+ ANNOTATE_RETPOLINE_SAFE; \
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
#define ENABLE_INTERRUPTS(clobbers) \
- PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \
+ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), \
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
- ANNOTATE_RETPOLINE_SAFE; \
+ ANNOTATE_RETPOLINE_SAFE; \
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
@@ -921,8 +920,7 @@ extern void default_banner(void);
* inlined, or the swapgs instruction must be trapped and emulated.
*/
#define SWAPGS_UNSAFE_STACK \
- PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \
- swapgs)
+ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), swapgs)
/*
* Note: swapgs is very special, and in practise is either going to be
@@ -931,8 +929,8 @@ extern void default_banner(void);
* it.
*/
#define SWAPGS \
- PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \
- ANNOTATE_RETPOLINE_SAFE; \
+ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), \
+ ANNOTATE_RETPOLINE_SAFE; \
call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \
)
@@ -942,15 +940,14 @@ extern void default_banner(void);
#define USERGS_SYSRET64 \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
- CLBR_NONE, \
- ANNOTATE_RETPOLINE_SAFE; \
+ ANNOTATE_RETPOLINE_SAFE; \
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64);)
#ifdef CONFIG_DEBUG_ENTRY
#define SAVE_FLAGS(clobbers) \
- PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
+ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), \
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
- ANNOTATE_RETPOLINE_SAFE; \
+ ANNOTATE_RETPOLINE_SAFE; \
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
#endif
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 225d871373ed..b2220536a7d4 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -681,7 +681,6 @@ struct paravirt_patch_site {
u8 *instr; /* original instructions */
u8 instrtype; /* type of this instruction */
u8 len; /* length of original instruction */
- u16 clobbers; /* what registers you may clobber */
};
extern struct paravirt_patch_site __parainstructions[],
--
2.16.4
^ permalink raw reply related
* [PATCH v3 06/15] x86/paravirt: remove clobbers parameter from paravirt patch functions
From: Juergen Gross @ 2018-08-28 7:40 UTC (permalink / raw)
To: linux-kernel, xen-devel, x86, virtualization
Cc: Juergen Gross, boris.ostrovsky, rusty, mingo, hpa, akataria, tglx
In-Reply-To: <20180828074026.820-1-jgross@suse.com>
The clobbers parameter from paravirt_patch_default() et al isn't used
any longer. Remove it.
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/include/asm/paravirt_types.h | 7 +++----
arch/x86/kernel/alternative.c | 2 +-
arch/x86/kernel/paravirt.c | 14 +++++---------
arch/x86/kernel/paravirt_patch_32.c | 5 ++---
arch/x86/kernel/paravirt_patch_64.c | 5 ++---
arch/x86/kernel/vsmp_64.c | 6 +++---
6 files changed, 16 insertions(+), 23 deletions(-)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 7d13197d760b..225d871373ed 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -85,7 +85,7 @@ struct pv_init_ops {
* the number of bytes of code generated, as we nop pad the
* rest in generic code.
*/
- unsigned (*patch)(u8 type, u16 clobber, void *insnbuf,
+ unsigned (*patch)(u8 type, void *insnbuf,
unsigned long addr, unsigned len);
} __no_randomize_layout;
@@ -373,14 +373,13 @@ extern struct pv_lock_ops pv_lock_ops;
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len);
unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
-unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
+unsigned paravirt_patch_default(u8 type, void *insnbuf,
unsigned long addr, unsigned len);
unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
const char *start, const char *end);
-unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
- unsigned long addr, unsigned len);
+unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len);
int paravirt_disable_iospace(void);
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 014f214da581..7985c20601b4 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -594,7 +594,7 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
BUG_ON(p->len > MAX_PATCH_LEN);
/* prep the buffer with the original instructions */
memcpy(insnbuf, p->instr, p->len);
- used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
+ used = pv_init_ops.patch(p->instrtype, insnbuf,
(unsigned long)p->instr, p->len);
BUG_ON(used > p->len);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 1abdbde35049..287d34513f6a 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -81,10 +81,8 @@ struct branch {
u32 delta;
} __attribute__((packed));
-static unsigned paravirt_patch_call(void *insnbuf,
- const void *target, u16 tgt_clobbers,
- unsigned long addr, u16 site_clobbers,
- unsigned len)
+static unsigned paravirt_patch_call(void *insnbuf, const void *target,
+ unsigned long addr, unsigned len)
{
struct branch *b = insnbuf;
unsigned long delta = (unsigned long)target - (addr+5);
@@ -149,7 +147,7 @@ static void *get_call_destination(u8 type)
return *((void **)&tmpl + type);
}
-unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
+unsigned paravirt_patch_default(u8 type, void *insnbuf,
unsigned long addr, unsigned len)
{
void *opfunc = get_call_destination(type);
@@ -172,10 +170,8 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
/* If operation requires a jmp, then jmp */
ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
else
- /* Otherwise call the function; assume target could
- clobber any caller-save reg */
- ret = paravirt_patch_call(insnbuf, opfunc, CLBR_ANY,
- addr, clobbers, len);
+ /* Otherwise call the function. */
+ ret = paravirt_patch_call(insnbuf, opfunc, addr, len);
return ret;
}
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index 758e69d72ebf..e5c3a438149e 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -30,8 +30,7 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
extern bool pv_is_native_spin_unlock(void);
extern bool pv_is_native_vcpu_is_preempted(void);
-unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
- unsigned long addr, unsigned len)
+unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
{
const unsigned char *start, *end;
unsigned ret;
@@ -70,7 +69,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
default:
patch_default: __maybe_unused
- ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
+ ret = paravirt_patch_default(type, ibuf, addr, len);
break;
patch_site:
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 9cb98f7b07c9..835f1985a115 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -38,8 +38,7 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
extern bool pv_is_native_spin_unlock(void);
extern bool pv_is_native_vcpu_is_preempted(void);
-unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
- unsigned long addr, unsigned len)
+unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
{
const unsigned char *start, *end;
unsigned ret;
@@ -80,7 +79,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
default:
patch_default: __maybe_unused
- ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
+ ret = paravirt_patch_default(type, ibuf, addr, len);
break;
patch_site:
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index 44685fb2a192..f194e5e1e95c 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -69,7 +69,7 @@ asmlinkage __visible void vsmp_irq_enable(void)
}
PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_enable);
-static unsigned __init vsmp_patch(u8 type, u16 clobbers, void *ibuf,
+static unsigned __init vsmp_patch(u8 type, void *ibuf,
unsigned long addr, unsigned len)
{
switch (type) {
@@ -77,9 +77,9 @@ static unsigned __init vsmp_patch(u8 type, u16 clobbers, void *ibuf,
case PARAVIRT_PATCH(pv_irq_ops.irq_disable):
case PARAVIRT_PATCH(pv_irq_ops.save_fl):
case PARAVIRT_PATCH(pv_irq_ops.restore_fl):
- return paravirt_patch_default(type, clobbers, ibuf, addr, len);
+ return paravirt_patch_default(type, ibuf, addr, len);
default:
- return native_patch(type, clobbers, ibuf, addr, len);
+ return native_patch(type, ibuf, addr, len);
}
}
--
2.16.4
^ permalink raw reply related
* [PATCH v3 05/15] x86/paravirt: make paravirt_patch_call() and paravirt_patch_jmp() static
From: Juergen Gross @ 2018-08-28 7:40 UTC (permalink / raw)
To: linux-kernel, xen-devel, x86, virtualization
Cc: Juergen Gross, boris.ostrovsky, rusty, mingo, hpa, akataria, tglx
In-Reply-To: <20180828074026.820-1-jgross@suse.com>
paravirt_patch_call() and paravirt_patch_jmp() are used in paravirt.c
only. Convert them to static.
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/include/asm/paravirt_types.h | 6 ------
arch/x86/kernel/paravirt.c | 12 ++++++------
2 files changed, 6 insertions(+), 12 deletions(-)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 4b75acc23b30..7d13197d760b 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -373,12 +373,6 @@ extern struct pv_lock_ops pv_lock_ops;
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len);
unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
-unsigned paravirt_patch_call(void *insnbuf,
- const void *target, u16 tgt_clobbers,
- unsigned long addr, u16 site_clobbers,
- unsigned len);
-unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
- unsigned long addr, unsigned len);
unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
unsigned long addr, unsigned len);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index afdb303285f8..1abdbde35049 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -81,10 +81,10 @@ struct branch {
u32 delta;
} __attribute__((packed));
-unsigned paravirt_patch_call(void *insnbuf,
- const void *target, u16 tgt_clobbers,
- unsigned long addr, u16 site_clobbers,
- unsigned len)
+static unsigned paravirt_patch_call(void *insnbuf,
+ const void *target, u16 tgt_clobbers,
+ unsigned long addr, u16 site_clobbers,
+ unsigned len)
{
struct branch *b = insnbuf;
unsigned long delta = (unsigned long)target - (addr+5);
@@ -103,8 +103,8 @@ unsigned paravirt_patch_call(void *insnbuf,
return 5;
}
-unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
- unsigned long addr, unsigned len)
+static unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
+ unsigned long addr, unsigned len)
{
struct branch *b = insnbuf;
unsigned long delta = (unsigned long)target - (addr+5);
--
2.16.4
^ permalink raw reply related
* [PATCH v3 04/15] xen: add SPDX identifier in arch/x86/xen files
From: Juergen Gross @ 2018-08-28 7:40 UTC (permalink / raw)
To: linux-kernel, xen-devel, x86, virtualization
Cc: Juergen Gross, boris.ostrovsky, rusty, mingo, hpa, akataria, tglx
In-Reply-To: <20180828074026.820-1-jgross@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
arch/x86/xen/efi.c | 14 +-------------
arch/x86/xen/enlighten.c | 2 ++
arch/x86/xen/enlighten_hvm.c | 2 ++
arch/x86/xen/grant-table.c | 25 +------------------------
arch/x86/xen/mmu.c | 2 ++
arch/x86/xen/mmu_pv.c | 2 ++
arch/x86/xen/p2m.c | 2 ++
arch/x86/xen/pci-swiotlb-xen.c | 2 ++
arch/x86/xen/platform-pci-unplug.c | 16 ++--------------
arch/x86/xen/vdso.h | 2 ++
arch/x86/xen/xen-pvh.S | 15 ++-------------
11 files changed, 20 insertions(+), 64 deletions(-)
diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
index 1804b27f9632..1fbb629a9d78 100644
--- a/arch/x86/xen/efi.c
+++ b/arch/x86/xen/efi.c
@@ -1,18 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2014 Oracle Co., Daniel Kiper
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/bitops.h>
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 2eeddd814653..749fb4b73eda 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+
#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
#include <linux/bootmem.h>
#endif
diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index 19c1ff542387..0e75642d42a3 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+
#include <linux/acpi.h>
#include <linux/cpu.h>
#include <linux/kexec.h>
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index 92ccc718152d..ecb0d5450334 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/******************************************************************************
* grant_table.c
* x86 specific part
@@ -8,30 +9,6 @@
* Copyright (c) 2004-2005, K A Fraser
* Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
* VA Linux Systems Japan. Split out x86 specific part.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
*/
#include <linux/sched.h>
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index e0e13fe16d37..60e9c37fd79f 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+
#include <linux/pfn.h>
#include <asm/xen/page.h>
#include <asm/xen/hypercall.h>
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index bdfac00b1ec8..1e9098f53967 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+
/*
* Xen mmu operations
*
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 159a897151d6..d6d74efd8912 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+
/*
* Xen leaves the responsibility for maintaining p2m mappings to the
* guests themselves, but it must also access and update the p2m array
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index 37c6056a7bba..33293ce01d8d 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+
/* Glue code to lib/swiotlb-xen.c */
#include <linux/dma-mapping.h>
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index 3957946a6cfe..2e794ac9d8e8 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+
/******************************************************************************
* platform-pci-unplug.c
*
* Xen platform PCI device driver
* Copyright (c) 2010, Citrix
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
*/
#include <linux/init.h>
diff --git a/arch/x86/xen/vdso.h b/arch/x86/xen/vdso.h
index 861fedfe5230..873c54c488fe 100644
--- a/arch/x86/xen/vdso.h
+++ b/arch/x86/xen/vdso.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
/* Bit used for the pseudo-hwcap for non-negative segments. We use
bit 1 to avoid bugs in some versions of glibc when bit 0 is
used; the choice is otherwise arbitrary. */
diff --git a/arch/x86/xen/xen-pvh.S b/arch/x86/xen/xen-pvh.S
index ca2d3b2bf2af..b0e471506cd8 100644
--- a/arch/x86/xen/xen-pvh.S
+++ b/arch/x86/xen/xen-pvh.S
@@ -1,18 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
/*
* Copyright C 2016, Oracle and/or its affiliates. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
*/
.code32
--
2.16.4
^ permalink raw reply related
* [PATCH v3 03/15] xen: link platform-pci-unplug.o only if CONFIG_XEN_PVHVM
From: Juergen Gross @ 2018-08-28 7:40 UTC (permalink / raw)
To: linux-kernel, xen-devel, x86, virtualization
Cc: Juergen Gross, boris.ostrovsky, rusty, mingo, hpa, akataria, tglx
In-Reply-To: <20180828074026.820-1-jgross@suse.com>
Instead of using one large #ifdef CONFIG_XEN_PVHVM in
arch/x86/xen/platform-pci-unplug.c add the object file depending on
CONFIG_XEN_PVHVM being set.
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
arch/x86/xen/Makefile | 2 +-
arch/x86/xen/platform-pci-unplug.c | 2 --
2 files changed, 1 insertion(+), 3 deletions(-)
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index a964f307a266..dd2550d33b38 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -19,11 +19,11 @@ obj-y += mmu.o
obj-y += time.o
obj-y += grant-table.o
obj-y += suspend.o
-obj-y += platform-pci-unplug.o
obj-$(CONFIG_XEN_PVHVM) += enlighten_hvm.o
obj-$(CONFIG_XEN_PVHVM) += mmu_hvm.o
obj-$(CONFIG_XEN_PVHVM) += suspend_hvm.o
+obj-$(CONFIG_XEN_PVHVM) += platform-pci-unplug.o
obj-$(CONFIG_XEN_PV) += setup.o
obj-$(CONFIG_XEN_PV) += apic.o
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index 33a783c77d96..3957946a6cfe 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -30,7 +30,6 @@
#define XEN_PLATFORM_ERR_PROTOCOL -2
#define XEN_PLATFORM_ERR_BLACKLIST -3
-#ifdef CONFIG_XEN_PVHVM
/* store the value of xen_emul_unplug after the unplug is done */
static int xen_platform_pci_unplug;
static int xen_emul_unplug;
@@ -214,4 +213,3 @@ static int __init parse_xen_emul_unplug(char *arg)
return 0;
}
early_param("xen_emul_unplug", parse_xen_emul_unplug);
-#endif
--
2.16.4
^ permalink raw reply related
* [PATCH v3 02/15] xen: move pv specific parts of arch/x86/xen/mmu.c to mmu_pv.c
From: Juergen Gross @ 2018-08-28 7:40 UTC (permalink / raw)
To: linux-kernel, xen-devel, x86, virtualization
Cc: Juergen Gross, boris.ostrovsky, rusty, mingo, hpa, akataria, tglx
In-Reply-To: <20180828074026.820-1-jgross@suse.com>
There are some PV specific functions in arch/x86/xen/mmu.c which can
be moved to mmu_pv.c. This in turn enables us to make multicalls.c
dependent on CONFIG_XEN_PV.
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
V3:
- fix build failure on ARM (Boris Ostrovsky)
---
arch/arm/xen/enlighten.c | 34 --------
arch/x86/xen/Makefile | 2 +-
arch/x86/xen/mmu.c | 186 -----------------------------------------
arch/x86/xen/mmu_pv.c | 138 ++++++++++++++++++++++++++++++
include/xen/interface/memory.h | 6 --
include/xen/xen-ops.h | 133 +++++++++++++++++++----------
6 files changed, 227 insertions(+), 272 deletions(-)
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 07060e5b5864..17e478928276 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -62,29 +62,6 @@ static __read_mostly unsigned int xen_events_irq;
uint32_t xen_start_flags;
EXPORT_SYMBOL(xen_start_flags);
-int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
- unsigned long addr,
- xen_pfn_t *gfn, int nr,
- int *err_ptr, pgprot_t prot,
- unsigned domid,
- struct page **pages)
-{
- return xen_xlate_remap_gfn_array(vma, addr, gfn, nr, err_ptr,
- prot, domid, pages);
-}
-EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_array);
-
-/* Not used by XENFEAT_auto_translated guests. */
-int xen_remap_domain_gfn_range(struct vm_area_struct *vma,
- unsigned long addr,
- xen_pfn_t gfn, int nr,
- pgprot_t prot, unsigned domid,
- struct page **pages)
-{
- return -ENOSYS;
-}
-EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_range);
-
int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
int nr, struct page **pages)
{
@@ -92,17 +69,6 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
}
EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range);
-/* Not used by XENFEAT_auto_translated guests. */
-int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
- unsigned long addr,
- xen_pfn_t *mfn, int nr,
- int *err_ptr, pgprot_t prot,
- unsigned int domid, struct page **pages)
-{
- return -ENOSYS;
-}
-EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);
-
static void xen_read_wallclock(struct timespec64 *ts)
{
u32 version;
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index f723b5aa8f74..a964f307a266 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -15,7 +15,6 @@ CFLAGS_enlighten_pv.o := $(nostackp)
CFLAGS_mmu_pv.o := $(nostackp)
obj-y += enlighten.o
-obj-y += multicalls.o
obj-y += mmu.o
obj-y += time.o
obj-y += grant-table.o
@@ -34,6 +33,7 @@ obj-$(CONFIG_XEN_PV) += p2m.o
obj-$(CONFIG_XEN_PV) += enlighten_pv.o
obj-$(CONFIG_XEN_PV) += mmu_pv.o
obj-$(CONFIG_XEN_PV) += irq.o
+obj-$(CONFIG_XEN_PV) += multicalls.o
obj-$(CONFIG_XEN_PV) += xen-asm.o
obj-$(CONFIG_XEN_PV) += xen-asm_$(BITS).o
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 96fc2f0fdbfe..e0e13fe16d37 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -6,12 +6,6 @@
#include "multicalls.h"
#include "mmu.h"
-/*
- * Protects atomic reservation decrease/increase against concurrent increases.
- * Also protects non-atomic updates of current_pages and balloon lists.
- */
-DEFINE_SPINLOCK(xen_reservation_lock);
-
unsigned long arbitrary_virt_to_mfn(void *vaddr)
{
xmaddr_t maddr = arbitrary_virt_to_machine(vaddr);
@@ -42,186 +36,6 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr)
}
EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine);
-static noinline void xen_flush_tlb_all(void)
-{
- struct mmuext_op *op;
- struct multicall_space mcs;
-
- preempt_disable();
-
- mcs = xen_mc_entry(sizeof(*op));
-
- op = mcs.args;
- op->cmd = MMUEXT_TLB_FLUSH_ALL;
- MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
-
- xen_mc_issue(PARAVIRT_LAZY_MMU);
-
- preempt_enable();
-}
-
-#define REMAP_BATCH_SIZE 16
-
-struct remap_data {
- xen_pfn_t *pfn;
- bool contiguous;
- bool no_translate;
- pgprot_t prot;
- struct mmu_update *mmu_update;
-};
-
-static int remap_area_pfn_pte_fn(pte_t *ptep, pgtable_t token,
- unsigned long addr, void *data)
-{
- struct remap_data *rmd = data;
- pte_t pte = pte_mkspecial(mfn_pte(*rmd->pfn, rmd->prot));
-
- /*
- * If we have a contiguous range, just update the pfn itself,
- * else update pointer to be "next pfn".
- */
- if (rmd->contiguous)
- (*rmd->pfn)++;
- else
- rmd->pfn++;
-
- rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
- rmd->mmu_update->ptr |= rmd->no_translate ?
- MMU_PT_UPDATE_NO_TRANSLATE :
- MMU_NORMAL_PT_UPDATE;
- rmd->mmu_update->val = pte_val_ma(pte);
- rmd->mmu_update++;
-
- return 0;
-}
-
-static int do_remap_pfn(struct vm_area_struct *vma,
- unsigned long addr,
- xen_pfn_t *pfn, int nr,
- int *err_ptr, pgprot_t prot,
- unsigned int domid,
- bool no_translate,
- struct page **pages)
-{
- int err = 0;
- struct remap_data rmd;
- struct mmu_update mmu_update[REMAP_BATCH_SIZE];
- unsigned long range;
- int mapped = 0;
-
- BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
-
- rmd.pfn = pfn;
- rmd.prot = prot;
- /*
- * We use the err_ptr to indicate if there we are doing a contiguous
- * mapping or a discontigious mapping.
- */
- rmd.contiguous = !err_ptr;
- rmd.no_translate = no_translate;
-
- while (nr) {
- int index = 0;
- int done = 0;
- int batch = min(REMAP_BATCH_SIZE, nr);
- int batch_left = batch;
- range = (unsigned long)batch << PAGE_SHIFT;
-
- rmd.mmu_update = mmu_update;
- err = apply_to_page_range(vma->vm_mm, addr, range,
- remap_area_pfn_pte_fn, &rmd);
- if (err)
- goto out;
-
- /* We record the error for each page that gives an error, but
- * continue mapping until the whole set is done */
- do {
- int i;
-
- err = HYPERVISOR_mmu_update(&mmu_update[index],
- batch_left, &done, domid);
-
- /*
- * @err_ptr may be the same buffer as @gfn, so
- * only clear it after each chunk of @gfn is
- * used.
- */
- if (err_ptr) {
- for (i = index; i < index + done; i++)
- err_ptr[i] = 0;
- }
- if (err < 0) {
- if (!err_ptr)
- goto out;
- err_ptr[i] = err;
- done++; /* Skip failed frame. */
- } else
- mapped += done;
- batch_left -= done;
- index += done;
- } while (batch_left);
-
- nr -= batch;
- addr += range;
- if (err_ptr)
- err_ptr += batch;
- cond_resched();
- }
-out:
-
- xen_flush_tlb_all();
-
- return err < 0 ? err : mapped;
-}
-
-int xen_remap_domain_gfn_range(struct vm_area_struct *vma,
- unsigned long addr,
- xen_pfn_t gfn, int nr,
- pgprot_t prot, unsigned domid,
- struct page **pages)
-{
- if (xen_feature(XENFEAT_auto_translated_physmap))
- return -EOPNOTSUPP;
-
- return do_remap_pfn(vma, addr, &gfn, nr, NULL, prot, domid, false,
- pages);
-}
-EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_range);
-
-int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
- unsigned long addr,
- xen_pfn_t *gfn, int nr,
- int *err_ptr, pgprot_t prot,
- unsigned domid, struct page **pages)
-{
- if (xen_feature(XENFEAT_auto_translated_physmap))
- return xen_xlate_remap_gfn_array(vma, addr, gfn, nr, err_ptr,
- prot, domid, pages);
-
- /* We BUG_ON because it's a programmer error to pass a NULL err_ptr,
- * and the consequences later is quite hard to detect what the actual
- * cause of "wrong memory was mapped in".
- */
- BUG_ON(err_ptr == NULL);
- return do_remap_pfn(vma, addr, gfn, nr, err_ptr, prot, domid,
- false, pages);
-}
-EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_array);
-
-int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
- unsigned long addr,
- xen_pfn_t *mfn, int nr,
- int *err_ptr, pgprot_t prot,
- unsigned int domid, struct page **pages)
-{
- if (xen_feature(XENFEAT_auto_translated_physmap))
- return -EOPNOTSUPP;
-
- return do_remap_pfn(vma, addr, mfn, nr, err_ptr, prot, domid,
- true, pages);
-}
-EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);
-
/* Returns: 0 success */
int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
int nr, struct page **pages)
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 45b700ac5fe7..bdfac00b1ec8 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -98,6 +98,12 @@ static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES);
static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
#endif /* CONFIG_X86_64 */
+/*
+ * Protects atomic reservation decrease/increase against concurrent increases.
+ * Also protects non-atomic updates of current_pages and balloon lists.
+ */
+DEFINE_SPINLOCK(xen_reservation_lock);
+
/*
* Note about cr3 (pagetable base) values:
*
@@ -2665,6 +2671,138 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
}
EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
+static noinline void xen_flush_tlb_all(void)
+{
+ struct mmuext_op *op;
+ struct multicall_space mcs;
+
+ preempt_disable();
+
+ mcs = xen_mc_entry(sizeof(*op));
+
+ op = mcs.args;
+ op->cmd = MMUEXT_TLB_FLUSH_ALL;
+ MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
+
+ xen_mc_issue(PARAVIRT_LAZY_MMU);
+
+ preempt_enable();
+}
+
+#define REMAP_BATCH_SIZE 16
+
+struct remap_data {
+ xen_pfn_t *pfn;
+ bool contiguous;
+ bool no_translate;
+ pgprot_t prot;
+ struct mmu_update *mmu_update;
+};
+
+static int remap_area_pfn_pte_fn(pte_t *ptep, pgtable_t token,
+ unsigned long addr, void *data)
+{
+ struct remap_data *rmd = data;
+ pte_t pte = pte_mkspecial(mfn_pte(*rmd->pfn, rmd->prot));
+
+ /*
+ * If we have a contiguous range, just update the pfn itself,
+ * else update pointer to be "next pfn".
+ */
+ if (rmd->contiguous)
+ (*rmd->pfn)++;
+ else
+ rmd->pfn++;
+
+ rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
+ rmd->mmu_update->ptr |= rmd->no_translate ?
+ MMU_PT_UPDATE_NO_TRANSLATE :
+ MMU_NORMAL_PT_UPDATE;
+ rmd->mmu_update->val = pte_val_ma(pte);
+ rmd->mmu_update++;
+
+ return 0;
+}
+
+int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr,
+ xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot,
+ unsigned int domid, bool no_translate, struct page **pages)
+{
+ int err = 0;
+ struct remap_data rmd;
+ struct mmu_update mmu_update[REMAP_BATCH_SIZE];
+ unsigned long range;
+ int mapped = 0;
+
+ BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
+
+ rmd.pfn = pfn;
+ rmd.prot = prot;
+ /*
+ * We use the err_ptr to indicate if there we are doing a contiguous
+ * mapping or a discontigious mapping.
+ */
+ rmd.contiguous = !err_ptr;
+ rmd.no_translate = no_translate;
+
+ while (nr) {
+ int index = 0;
+ int done = 0;
+ int batch = min(REMAP_BATCH_SIZE, nr);
+ int batch_left = batch;
+
+ range = (unsigned long)batch << PAGE_SHIFT;
+
+ rmd.mmu_update = mmu_update;
+ err = apply_to_page_range(vma->vm_mm, addr, range,
+ remap_area_pfn_pte_fn, &rmd);
+ if (err)
+ goto out;
+
+ /*
+ * We record the error for each page that gives an error, but
+ * continue mapping until the whole set is done
+ */
+ do {
+ int i;
+
+ err = HYPERVISOR_mmu_update(&mmu_update[index],
+ batch_left, &done, domid);
+
+ /*
+ * @err_ptr may be the same buffer as @gfn, so
+ * only clear it after each chunk of @gfn is
+ * used.
+ */
+ if (err_ptr) {
+ for (i = index; i < index + done; i++)
+ err_ptr[i] = 0;
+ }
+ if (err < 0) {
+ if (!err_ptr)
+ goto out;
+ err_ptr[i] = err;
+ done++; /* Skip failed frame. */
+ } else
+ mapped += done;
+ batch_left -= done;
+ index += done;
+ } while (batch_left);
+
+ nr -= batch;
+ addr += range;
+ if (err_ptr)
+ err_ptr += batch;
+ cond_resched();
+ }
+out:
+
+ xen_flush_tlb_all();
+
+ return err < 0 ? err : mapped;
+}
+EXPORT_SYMBOL_GPL(xen_remap_pfn);
+
#ifdef CONFIG_KEXEC_CORE
phys_addr_t paddr_vmcoreinfo_note(void)
{
diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
index 4c5751c26f87..447004861f00 100644
--- a/include/xen/interface/memory.h
+++ b/include/xen/interface/memory.h
@@ -244,12 +244,6 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map);
#define XENMEM_machine_memory_map 10
-/*
- * Prevent the balloon driver from changing the memory reservation
- * during a driver critical region.
- */
-extern spinlock_t xen_reservation_lock;
-
/*
* Unmaps the page appearing at a particular GPFN from the specified guest's
* pseudophysical address space.
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index fd18c974a619..18803ff76e27 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -5,6 +5,7 @@
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/efi.h>
+#include <xen/features.h>
#include <asm/xen/interface.h>
#include <xen/interface/vcpu.h>
@@ -47,6 +48,10 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
dma_addr_t *dma_handle);
void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order);
+
+int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr,
+ xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot,
+ unsigned int domid, bool no_translate, struct page **pages);
#else
static inline int xen_create_contiguous_region(phys_addr_t pstart,
unsigned int order,
@@ -58,10 +63,50 @@ static inline int xen_create_contiguous_region(phys_addr_t pstart,
static inline void xen_destroy_contiguous_region(phys_addr_t pstart,
unsigned int order) { }
+
+static inline int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr,
+ xen_pfn_t *pfn, int nr, int *err_ptr,
+ pgprot_t prot, unsigned int domid,
+ bool no_translate, struct page **pages)
+{
+ BUG();
+ return 0;
+}
#endif
struct vm_area_struct;
+#ifdef CONFIG_XEN_AUTO_XLATE
+int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
+ unsigned long addr,
+ xen_pfn_t *gfn, int nr,
+ int *err_ptr, pgprot_t prot,
+ unsigned int domid,
+ struct page **pages);
+int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
+ int nr, struct page **pages);
+#else
+/*
+ * These two functions are called from arch/x86/xen/mmu.c and so stubs
+ * are needed for a configuration not specifying CONFIG_XEN_AUTO_XLATE.
+ */
+static inline int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
+ unsigned long addr,
+ xen_pfn_t *gfn, int nr,
+ int *err_ptr, pgprot_t prot,
+ unsigned int domid,
+ struct page **pages)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
+ int nr, struct page **pages)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
/*
* xen_remap_domain_gfn_array() - map an array of foreign frames by gfn
* @vma: VMA to map the pages into
@@ -79,12 +124,25 @@ struct vm_area_struct;
* Returns the number of successfully mapped frames, or a -ve error
* code.
*/
-int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
- unsigned long addr,
- xen_pfn_t *gfn, int nr,
- int *err_ptr, pgprot_t prot,
- unsigned domid,
- struct page **pages);
+static inline int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
+ unsigned long addr,
+ xen_pfn_t *gfn, int nr,
+ int *err_ptr, pgprot_t prot,
+ unsigned int domid,
+ struct page **pages)
+{
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return xen_xlate_remap_gfn_array(vma, addr, gfn, nr, err_ptr,
+ prot, domid, pages);
+
+ /* We BUG_ON because it's a programmer error to pass a NULL err_ptr,
+ * and the consequences later is quite hard to detect what the actual
+ * cause of "wrong memory was mapped in".
+ */
+ BUG_ON(err_ptr == NULL);
+ return xen_remap_pfn(vma, addr, gfn, nr, err_ptr, prot, domid,
+ false, pages);
+}
/*
* xen_remap_domain_mfn_array() - map an array of foreign frames by mfn
@@ -103,10 +161,18 @@ int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
* Returns the number of successfully mapped frames, or a -ve error
* code.
*/
-int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
- unsigned long addr, xen_pfn_t *mfn, int nr,
- int *err_ptr, pgprot_t prot,
- unsigned int domid, struct page **pages);
+static inline int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
+ unsigned long addr, xen_pfn_t *mfn,
+ int nr, int *err_ptr,
+ pgprot_t prot, unsigned int domid,
+ struct page **pages)
+{
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return -EOPNOTSUPP;
+
+ return xen_remap_pfn(vma, addr, mfn, nr, err_ptr, prot, domid,
+ true, pages);
+}
/* xen_remap_domain_gfn_range() - map a range of foreign frames
* @vma: VMA to map the pages into
@@ -120,44 +186,21 @@ int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
* Returns the number of successfully mapped frames, or a -ve error
* code.
*/
-int xen_remap_domain_gfn_range(struct vm_area_struct *vma,
- unsigned long addr,
- xen_pfn_t gfn, int nr,
- pgprot_t prot, unsigned domid,
- struct page **pages);
-int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
- int numpgs, struct page **pages);
-
-#ifdef CONFIG_XEN_AUTO_XLATE
-int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
- unsigned long addr,
- xen_pfn_t *gfn, int nr,
- int *err_ptr, pgprot_t prot,
- unsigned domid,
- struct page **pages);
-int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
- int nr, struct page **pages);
-#else
-/*
- * These two functions are called from arch/x86/xen/mmu.c and so stubs
- * are needed for a configuration not specifying CONFIG_XEN_AUTO_XLATE.
- */
-static inline int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
- unsigned long addr,
- xen_pfn_t *gfn, int nr,
- int *err_ptr, pgprot_t prot,
- unsigned int domid,
- struct page **pages)
+static inline int xen_remap_domain_gfn_range(struct vm_area_struct *vma,
+ unsigned long addr,
+ xen_pfn_t gfn, int nr,
+ pgprot_t prot, unsigned int domid,
+ struct page **pages)
{
- return -EOPNOTSUPP;
-}
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return -EOPNOTSUPP;
-static inline int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
- int nr, struct page **pages)
-{
- return -EOPNOTSUPP;
+ return xen_remap_pfn(vma, addr, &gfn, nr, NULL, prot, domid, false,
+ pages);
}
-#endif
+
+int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
+ int numpgs, struct page **pages);
int xen_xlate_map_ballooned_pages(xen_pfn_t **pfns, void **vaddr,
unsigned long nr_grant_frames);
--
2.16.4
^ permalink raw reply related
* [PATCH v3 01/15] xen: move pv irq related functions under CONFIG_XEN_PV umbrella
From: Juergen Gross @ 2018-08-28 7:40 UTC (permalink / raw)
To: linux-kernel, xen-devel, x86, virtualization
Cc: Juergen Gross, boris.ostrovsky, rusty, mingo, hpa, akataria, tglx
In-Reply-To: <20180828074026.820-1-jgross@suse.com>
All functions in arch/x86/xen/irq.c and arch/x86/xen/xen-asm*.S are
specific to PV guests. Include them in the kernel with
CONFIG_XEN_PV only.
Make the PV specific code in arch/x86/entry/entry_*.S dependent on
CONFIG_XEN_PV instead of CONFIG_XEN.
The HVM specific code should depend on CONFIG_XEN_PVHVM.
While at it reformat the Makefile to make it more readable.
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/entry_32.S | 8 +++++---
arch/x86/entry/entry_64.S | 8 +++++---
arch/x86/xen/Makefile | 41 +++++++++++++++++++++++++++++++----------
include/xen/events.h | 2 ++
4 files changed, 43 insertions(+), 16 deletions(-)
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 2767c625a52c..9cc4c3064ce0 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -782,7 +782,7 @@ GLOBAL(__begin_SYSENTER_singlestep_region)
* will ignore all of the single-step traps generated in this range.
*/
-#ifdef CONFIG_XEN
+#ifdef CONFIG_XEN_PV
/*
* Xen doesn't set %esp to be precisely what the normal SYSENTER
* entry point expects, so fix it up before using the normal path.
@@ -1240,7 +1240,7 @@ ENTRY(spurious_interrupt_bug)
jmp common_exception
END(spurious_interrupt_bug)
-#ifdef CONFIG_XEN
+#ifdef CONFIG_XEN_PV
ENTRY(xen_hypervisor_callback)
pushl $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
@@ -1321,11 +1321,13 @@ ENTRY(xen_failsafe_callback)
_ASM_EXTABLE(3b, 8b)
_ASM_EXTABLE(4b, 9b)
ENDPROC(xen_failsafe_callback)
+#endif /* CONFIG_XEN_PV */
+#ifdef CONFIG_XEN_PVHVM
BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
xen_evtchn_do_upcall)
+#endif
-#endif /* CONFIG_XEN */
#if IS_ENABLED(CONFIG_HYPERV)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 957dfb693ecc..a9ec5d3c6e67 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1050,7 +1050,7 @@ ENTRY(do_softirq_own_stack)
ret
ENDPROC(do_softirq_own_stack)
-#ifdef CONFIG_XEN
+#ifdef CONFIG_XEN_PV
idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0
/*
@@ -1130,11 +1130,13 @@ ENTRY(xen_failsafe_callback)
ENCODE_FRAME_POINTER
jmp error_exit
END(xen_failsafe_callback)
+#endif /* CONFIG_XEN_PV */
+#ifdef CONFIG_XEN_PVHVM
apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
xen_hvm_callback_vector xen_evtchn_do_upcall
+#endif
-#endif /* CONFIG_XEN */
#if IS_ENABLED(CONFIG_HYPERV)
apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
@@ -1151,7 +1153,7 @@ idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
idtentry int3 do_int3 has_error_code=0
idtentry stack_segment do_stack_segment has_error_code=1
-#ifdef CONFIG_XEN
+#ifdef CONFIG_XEN_PV
idtentry xennmi do_nmi has_error_code=0
idtentry xendebug do_debug has_error_code=0
idtentry xenint3 do_int3 has_error_code=0
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index d83cb5478f54..f723b5aa8f74 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -12,25 +12,46 @@ endif
# Make sure early boot has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_enlighten_pv.o := $(nostackp)
-CFLAGS_mmu_pv.o := $(nostackp)
+CFLAGS_mmu_pv.o := $(nostackp)
-obj-y := enlighten.o multicalls.o mmu.o irq.o \
- time.o xen-asm.o xen-asm_$(BITS).o \
- grant-table.o suspend.o platform-pci-unplug.o
+obj-y += enlighten.o
+obj-y += multicalls.o
+obj-y += mmu.o
+obj-y += time.o
+obj-y += grant-table.o
+obj-y += suspend.o
+obj-y += platform-pci-unplug.o
-obj-$(CONFIG_XEN_PVHVM) += enlighten_hvm.o mmu_hvm.o suspend_hvm.o
-obj-$(CONFIG_XEN_PV) += setup.o apic.o pmu.o suspend_pv.o \
- p2m.o enlighten_pv.o mmu_pv.o
-obj-$(CONFIG_XEN_PVH) += enlighten_pvh.o
+obj-$(CONFIG_XEN_PVHVM) += enlighten_hvm.o
+obj-$(CONFIG_XEN_PVHVM) += mmu_hvm.o
+obj-$(CONFIG_XEN_PVHVM) += suspend_hvm.o
-obj-$(CONFIG_EVENT_TRACING) += trace.o
+obj-$(CONFIG_XEN_PV) += setup.o
+obj-$(CONFIG_XEN_PV) += apic.o
+obj-$(CONFIG_XEN_PV) += pmu.o
+obj-$(CONFIG_XEN_PV) += suspend_pv.o
+obj-$(CONFIG_XEN_PV) += p2m.o
+obj-$(CONFIG_XEN_PV) += enlighten_pv.o
+obj-$(CONFIG_XEN_PV) += mmu_pv.o
+obj-$(CONFIG_XEN_PV) += irq.o
+obj-$(CONFIG_XEN_PV) += xen-asm.o
+obj-$(CONFIG_XEN_PV) += xen-asm_$(BITS).o
+
+obj-$(CONFIG_XEN_PVH) += enlighten_pvh.o
+obj-$(CONFIG_XEN_PVH) += xen-pvh.o
+
+obj-$(CONFIG_EVENT_TRACING) += trace.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_XEN_PV_SMP) += smp_pv.o
obj-$(CONFIG_XEN_PVHVM_SMP) += smp_hvm.o
+
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
+
obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
+
obj-$(CONFIG_XEN_DOM0) += vga.o
+
obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o
+
obj-$(CONFIG_XEN_EFI) += efi.o
-obj-$(CONFIG_XEN_PVH) += xen-pvh.o
diff --git a/include/xen/events.h b/include/xen/events.h
index c3e6bc643a7b..a48897199975 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -89,11 +89,13 @@ unsigned irq_from_evtchn(unsigned int evtchn);
int irq_from_virq(unsigned int cpu, unsigned int virq);
unsigned int evtchn_from_irq(unsigned irq);
+#ifdef CONFIG_XEN_PVHVM
/* Xen HVM evtchn vector callback */
void xen_hvm_callback_vector(void);
#ifdef CONFIG_TRACING
#define trace_xen_hvm_callback_vector xen_hvm_callback_vector
#endif
+#endif
int xen_set_callback_via(uint64_t via);
void xen_evtchn_do_upcall(struct pt_regs *regs);
void xen_hvm_evtchn_do_upcall(void);
--
2.16.4
^ permalink raw reply related
* [PATCH v3 00/15] x86/paravirt, xen: several cleanups
From: Juergen Gross @ 2018-08-28 7:40 UTC (permalink / raw)
To: linux-kernel, xen-devel, x86, virtualization
Cc: Juergen Gross, boris.ostrovsky, rusty, mingo, hpa, akataria, tglx
This series removes some no longer needed stuff from paravirt
infrastructure and puts large quantities of paravirt ops under a new
config option PARAVIRT_XXL which is selected by XEN_PV only.
Some Xen related cleanups:
- move some pv-only code from CONFIG_XEN to CONFIG_XEN_PV
- use CONFIG_XEN_PVHVM in Makefile instead of #ifdef around a complete source
- add SPDX identifier where missing
A pvops kernel without XEN_PV being configured is about 2.5% smaller
with this series applied.
Changes in V3:
- merged with the Xen cleanup series which is a prerequisite for the
pv cleanup
- fix build errors on ARM
- let VSMP select PARAVIRT_XXL, put all irq_ops under PARAVIRT_XXL
Changes in V2:
- patch 4: shorten pv_ops sub-structure names (Jan Beulich)
- patch 11: new patch
Juergen Gross (15):
xen: move pv irq related functions under CONFIG_XEN_PV umbrella
xen: move pv specific parts of arch/x86/xen/mmu.c to mmu_pv.c
xen: link platform-pci-unplug.o only if CONFIG_XEN_PVHVM
xen: add SPDX identifier in arch/x86/xen files
x86/paravirt: make paravirt_patch_call() and paravirt_patch_jmp()
static
x86/paravirt: remove clobbers parameter from paravirt patch functions
x86/paravirt: remove clobbers from struct paravirt_patch_site
x86/paravirt: use a single ops structure
x86/paravirt: remove unused paravirt bits
x86/paravirt: introduce new config option PARAVIRT_XXL
x86/paravirt: move items in pv_info under PARAVIRT_XXL umbrella
x86/paravirt: move the Xen-only pv_cpu_ops under the PARAVIRT_XXL
umbrella
x86/paravirt: move the pv_irq_ops under the PARAVIRT_XXL umbrella
x86/paravirt: move the Xen-only pv_mmu_ops under the PARAVIRT_XXL
umbrella
x86/paravirt: remove unneeded mmu related paravirt ops bits
arch/arm/include/asm/paravirt.h | 9 +-
arch/arm/kernel/paravirt.c | 4 +-
arch/arm/xen/enlighten.c | 34 ---
arch/arm64/include/asm/paravirt.h | 9 +-
arch/arm64/kernel/paravirt.c | 4 +-
arch/x86/Kconfig | 4 +
arch/x86/boot/compressed/misc.h | 1 +
arch/x86/entry/entry_32.S | 8 +-
arch/x86/entry/entry_64.S | 8 +-
arch/x86/hyperv/mmu.c | 4 +-
arch/x86/include/asm/debugreg.h | 2 +-
arch/x86/include/asm/desc.h | 4 +-
arch/x86/include/asm/fixmap.h | 2 +-
arch/x86/include/asm/irqflags.h | 16 +-
arch/x86/include/asm/mmu_context.h | 4 +-
arch/x86/include/asm/msr.h | 4 +-
arch/x86/include/asm/paravirt.h | 415 +++++++++++++---------------
arch/x86/include/asm/paravirt_types.h | 82 +++---
arch/x86/include/asm/pgalloc.h | 2 +-
arch/x86/include/asm/pgtable-3level_types.h | 2 +-
arch/x86/include/asm/pgtable.h | 7 +-
arch/x86/include/asm/processor.h | 4 +-
arch/x86/include/asm/ptrace.h | 2 +-
arch/x86/include/asm/segment.h | 2 +-
arch/x86/include/asm/special_insns.h | 4 +-
arch/x86/kernel/alternative.c | 2 +-
arch/x86/kernel/asm-offsets.c | 13 +-
arch/x86/kernel/asm-offsets_64.c | 9 +-
arch/x86/kernel/cpu/common.c | 4 +-
arch/x86/kernel/cpu/vmware.c | 4 +-
arch/x86/kernel/head_64.S | 2 +-
arch/x86/kernel/kvm.c | 19 +-
arch/x86/kernel/kvmclock.c | 4 +-
arch/x86/kernel/paravirt-spinlocks.c | 15 +-
arch/x86/kernel/paravirt.c | 305 ++++++++++----------
arch/x86/kernel/paravirt_patch_32.c | 57 ++--
arch/x86/kernel/paravirt_patch_64.c | 65 ++---
arch/x86/kernel/tsc.c | 2 +-
arch/x86/kernel/vsmp_64.c | 26 +-
arch/x86/mm/mem_encrypt_identity.c | 1 +
arch/x86/xen/Kconfig | 1 +
arch/x86/xen/Makefile | 41 ++-
arch/x86/xen/efi.c | 14 +-
arch/x86/xen/enlighten.c | 2 +
arch/x86/xen/enlighten_hvm.c | 2 +
arch/x86/xen/enlighten_pv.c | 31 ++-
arch/x86/xen/grant-table.c | 25 +-
arch/x86/xen/irq.c | 2 +-
arch/x86/xen/mmu.c | 188 +------------
arch/x86/xen/mmu_hvm.c | 2 +-
arch/x86/xen/mmu_pv.c | 168 ++++++++++-
arch/x86/xen/p2m.c | 2 +
arch/x86/xen/pci-swiotlb-xen.c | 2 +
arch/x86/xen/platform-pci-unplug.c | 18 +-
arch/x86/xen/spinlock.c | 11 +-
arch/x86/xen/time.c | 4 +-
arch/x86/xen/vdso.h | 2 +
arch/x86/xen/xen-pvh.S | 15 +-
drivers/xen/time.c | 2 +-
include/xen/events.h | 2 +
include/xen/interface/memory.h | 6 -
include/xen/xen-ops.h | 133 ++++++---
62 files changed, 880 insertions(+), 958 deletions(-)
--
2.16.4
^ permalink raw reply
* Re: [PATCH net-next v2 0/5] virtio: support packed ring
From: Michael S. Tsirkin @ 2018-08-27 14:00 UTC (permalink / raw)
To: Tiwei Bie; +Cc: virtio-dev, netdev, linux-kernel, virtualization, wexu
In-Reply-To: <20180711022711.7090-1-tiwei.bie@intel.com>
Are there still plans to test the performance with vost pmd?
vhost doesn't seem to show a performance gain ...
On Wed, Jul 11, 2018 at 10:27:06AM +0800, Tiwei Bie wrote:
> Hello everyone,
>
> This patch set implements packed ring support in virtio driver.
>
> Some functional tests have been done with Jason's
> packed ring implementation in vhost:
>
> https://lkml.org/lkml/2018/7/3/33
>
> Both of ping and netperf worked as expected.
>
> v1 -> v2:
> - Use READ_ONCE() to read event off_wrap and flags together (Jason);
> - Add comments related to ccw (Jason);
>
> RFC (v6) -> v1:
> - Avoid extra virtio_wmb() in virtqueue_enable_cb_delayed_packed()
> when event idx is off (Jason);
> - Fix bufs calculation in virtqueue_enable_cb_delayed_packed() (Jason);
> - Test the state of the desc at used_idx instead of last_used_idx
> in virtqueue_enable_cb_delayed_packed() (Jason);
> - Save wrap counter (as part of queue state) in the return value
> of virtqueue_enable_cb_prepare_packed();
> - Refine the packed ring definitions in uapi;
> - Rebase on the net-next tree;
>
> RFC v5 -> RFC v6:
> - Avoid tracking addr/len/flags when DMA API isn't used (MST/Jason);
> - Define wrap counter as bool (Jason);
> - Use ALIGN() in vring_init_packed() (Jason);
> - Avoid using pointer to track `next` in detach_buf_packed() (Jason);
> - Add comments for barriers (Jason);
> - Don't enable RING_PACKED on ccw for now (noticed by Jason);
> - Refine the memory barrier in virtqueue_poll();
> - Add a missing memory barrier in virtqueue_enable_cb_delayed_packed();
> - Remove the hacks in virtqueue_enable_cb_prepare_packed();
>
> RFC v4 -> RFC v5:
> - Save DMA addr, etc in desc state (Jason);
> - Track used wrap counter;
>
> RFC v3 -> RFC v4:
> - Make ID allocation support out-of-order (Jason);
> - Various fixes for EVENT_IDX support;
>
> RFC v2 -> RFC v3:
> - Split into small patches (Jason);
> - Add helper virtqueue_use_indirect() (Jason);
> - Just set id for the last descriptor of a list (Jason);
> - Calculate the prev in virtqueue_add_packed() (Jason);
> - Fix/improve desc suppression code (Jason/MST);
> - Refine the code layout for XXX_split/packed and wrappers (MST);
> - Fix the comments and API in uapi (MST);
> - Remove the BUG_ON() for indirect (Jason);
> - Some other refinements and bug fixes;
>
> RFC v1 -> RFC v2:
> - Add indirect descriptor support - compile test only;
> - Add event suppression supprt - compile test only;
> - Move vring_packed_init() out of uapi (Jason, MST);
> - Merge two loops into one in virtqueue_add_packed() (Jason);
> - Split vring_unmap_one() for packed ring and split ring (Jason);
> - Avoid using '%' operator (Jason);
> - Rename free_head -> next_avail_idx (Jason);
> - Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
> - Some other refinements and bug fixes;
>
> Thanks!
>
> Tiwei Bie (5):
> virtio: add packed ring definitions
> virtio_ring: support creating packed ring
> virtio_ring: add packed ring support
> virtio_ring: add event idx support in packed ring
> virtio_ring: enable packed ring
>
> drivers/s390/virtio/virtio_ccw.c | 14 +
> drivers/virtio/virtio_ring.c | 1365 ++++++++++++++++++++++------
> include/linux/virtio_ring.h | 8 +-
> include/uapi/linux/virtio_config.h | 3 +
> include/uapi/linux/virtio_ring.h | 43 +
> 5 files changed, 1157 insertions(+), 276 deletions(-)
>
> --
> 2.18.0
^ permalink raw reply
* [PATCH v37 3/3] virtio-balloon: VIRTIO_BALLOON_F_PAGE_POISON
From: Wei Wang @ 2018-08-27 1:32 UTC (permalink / raw)
To: virtio-dev, linux-kernel, virtualization, kvm, linux-mm, mst,
mhocko, akpm, dgilbert
Cc: yang.zhang.wz, riel, quan.xu0, liliang.opensource, pbonzini,
nilal, torvalds
In-Reply-To: <1535333539-32420-1-git-send-email-wei.w.wang@intel.com>
The VIRTIO_BALLOON_F_PAGE_POISON feature bit is used to indicate if the
guest is using page poisoning. Guest writes to the poison_val config
field to tell host about the page poisoning value that is in use.
Suggested-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Wei Wang <wei.w.wang@intel.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
drivers/virtio/virtio_balloon.c | 10 ++++++++++
include/uapi/linux/virtio_balloon.h | 3 +++
2 files changed, 13 insertions(+)
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index a185678..728ecd1 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -825,6 +825,7 @@ static int virtio_balloon_register_shrinker(struct virtio_balloon *vb)
static int virtballoon_probe(struct virtio_device *vdev)
{
struct virtio_balloon *vb;
+ __u32 poison_val;
int err;
if (!vdev->config->get) {
@@ -892,6 +893,11 @@ static int virtballoon_probe(struct virtio_device *vdev)
vb->num_free_page_blocks = 0;
spin_lock_init(&vb->free_page_list_lock);
INIT_LIST_HEAD(&vb->free_page_list);
+ if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) {
+ memset(&poison_val, PAGE_POISON, sizeof(poison_val));
+ virtio_cwrite(vb->vdev, struct virtio_balloon_config,
+ poison_val, &poison_val);
+ }
}
/*
* We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a
@@ -992,6 +998,9 @@ static int virtballoon_restore(struct virtio_device *vdev)
static int virtballoon_validate(struct virtio_device *vdev)
{
+ if (!page_poisoning_enabled())
+ __virtio_clear_bit(vdev, VIRTIO_BALLOON_F_PAGE_POISON);
+
__virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM);
return 0;
}
@@ -1001,6 +1010,7 @@ static unsigned int features[] = {
VIRTIO_BALLOON_F_STATS_VQ,
VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
VIRTIO_BALLOON_F_FREE_PAGE_HINT,
+ VIRTIO_BALLOON_F_PAGE_POISON,
};
static struct virtio_driver virtio_balloon_driver = {
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index 47c9eb4..a1966cd7 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -35,6 +35,7 @@
#define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */
#define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */
#define VIRTIO_BALLOON_F_FREE_PAGE_HINT 3 /* VQ to report free pages */
+#define VIRTIO_BALLOON_F_PAGE_POISON 4 /* Guest is using page poisoning */
/* Size of a PFN in the balloon interface. */
#define VIRTIO_BALLOON_PFN_SHIFT 12
@@ -48,6 +49,8 @@ struct virtio_balloon_config {
__u32 actual;
/* Free page report command id, readonly by guest */
__u32 free_page_report_cmd_id;
+ /* Stores PAGE_POISON if page poisoning is in use */
+ __u32 poison_val;
};
#define VIRTIO_BALLOON_S_SWAP_IN 0 /* Amount of memory swapped in */
--
2.7.4
^ permalink raw reply related
* [PATCH v37 2/3] mm/page_poison: expose page_poisoning_enabled to kernel modules
From: Wei Wang @ 2018-08-27 1:32 UTC (permalink / raw)
To: virtio-dev, linux-kernel, virtualization, kvm, linux-mm, mst,
mhocko, akpm, dgilbert
Cc: yang.zhang.wz, riel, quan.xu0, liliang.opensource, pbonzini,
nilal, torvalds
In-Reply-To: <1535333539-32420-1-git-send-email-wei.w.wang@intel.com>
In some usages, e.g. virtio-balloon, a kernel module needs to know if
page poisoning is in use. This patch exposes the page_poisoning_enabled
function to kernel modules.
Signed-off-by: Wei Wang <wei.w.wang@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
---
mm/page_poison.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/mm/page_poison.c b/mm/page_poison.c
index aa2b3d3..830f604 100644
--- a/mm/page_poison.c
+++ b/mm/page_poison.c
@@ -17,6 +17,11 @@ static int __init early_page_poison_param(char *buf)
}
early_param("page_poison", early_page_poison_param);
+/**
+ * page_poisoning_enabled - check if page poisoning is enabled
+ *
+ * Return true if page poisoning is enabled, or false if not.
+ */
bool page_poisoning_enabled(void)
{
/*
@@ -29,6 +34,7 @@ bool page_poisoning_enabled(void)
(!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) &&
debug_pagealloc_enabled()));
}
+EXPORT_SYMBOL_GPL(page_poisoning_enabled);
static void poison_page(struct page *page)
{
--
2.7.4
^ permalink raw reply related
* [PATCH v37 1/3] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT
From: Wei Wang @ 2018-08-27 1:32 UTC (permalink / raw)
To: virtio-dev, linux-kernel, virtualization, kvm, linux-mm, mst,
mhocko, akpm, dgilbert
Cc: yang.zhang.wz, riel, quan.xu0, liliang.opensource, pbonzini,
nilal, torvalds
In-Reply-To: <1535333539-32420-1-git-send-email-wei.w.wang@intel.com>
Negotiation of the VIRTIO_BALLOON_F_FREE_PAGE_HINT feature indicates the
support of reporting hints of guest free pages to host via virtio-balloon.
Currenlty, only free page blocks of MAX_ORDER - 1 are reported. They are
obtained one by one from the mm free list via the regular allocation
function.
Host requests the guest to report free page hints by sending a new cmd id
to the guest via the free_page_report_cmd_id configuration register. When
the guest starts to report, it first sends a start cmd to host via the
free page vq, which acks to host the cmd id received. When the guest
finishes reporting free pages, a stop cmd is sent to host via the vq.
Host may also send a stop cmd id to the guest to stop the reporting.
VIRTIO_BALLOON_CMD_ID_STOP: Host sends this cmd to stop the guest
reporting.
VIRTIO_BALLOON_CMD_ID_DONE: Host sends this cmd to tell the guest that
the reported pages are ready to be freed.
Why does the guest free the reported pages when host tells it is ready to
free?
This is because freeing pages appears to be expensive for live migration.
free_pages() dirties memory very quickly and makes the live migraion not
converge in some cases. So it is good to delay the free_page operation
when the migration is done, and host sends a command to guest about that.
Why do we need the new VIRTIO_BALLOON_CMD_ID_DONE, instead of reusing
VIRTIO_BALLOON_CMD_ID_STOP?
This is because live migration is usually done in several rounds. At the
end of each round, host needs to send a VIRTIO_BALLOON_CMD_ID_STOP cmd to
the guest to stop (or say pause) the reporting. The guest resumes the
reporting when it receives a new command id at the beginning of the next
round. So we need a new cmd id to distinguish between "stop reporting" and
"ready to free the reported pages".
TODO:
- Add a batch page allocation API to amortize the allocation overhead.
Signed-off-by: Wei Wang <wei.w.wang@intel.com>
Signed-off-by: Liang Li <liang.z.li@intel.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
---
drivers/virtio/virtio_balloon.c | 364 ++++++++++++++++++++++++++++++++----
include/uapi/linux/virtio_balloon.h | 5 +
2 files changed, 336 insertions(+), 33 deletions(-)
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index d1c1f62..a185678 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -41,13 +41,34 @@
#define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
#define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
+#define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \
+ __GFP_NOMEMALLOC)
+/* The order of free page blocks to report to host */
+#define VIRTIO_BALLOON_FREE_PAGE_ORDER (MAX_ORDER - 1)
+/* The size of a free page block in bytes */
+#define VIRTIO_BALLOON_FREE_PAGE_SIZE \
+ (1 << (VIRTIO_BALLOON_FREE_PAGE_ORDER + PAGE_SHIFT))
+
#ifdef CONFIG_BALLOON_COMPACTION
static struct vfsmount *balloon_mnt;
#endif
+enum virtio_balloon_vq {
+ VIRTIO_BALLOON_VQ_INFLATE,
+ VIRTIO_BALLOON_VQ_DEFLATE,
+ VIRTIO_BALLOON_VQ_STATS,
+ VIRTIO_BALLOON_VQ_FREE_PAGE,
+ VIRTIO_BALLOON_VQ_MAX
+};
+
struct virtio_balloon {
struct virtio_device *vdev;
- struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
+ struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq;
+
+ /* Balloon's own wq for cpu-intensive work items */
+ struct workqueue_struct *balloon_wq;
+ /* The free page reporting work item submitted to the balloon wq */
+ struct work_struct report_free_page_work;
/* The balloon servicing is delegated to a freezable workqueue. */
struct work_struct update_balloon_stats_work;
@@ -57,6 +78,18 @@ struct virtio_balloon {
spinlock_t stop_update_lock;
bool stop_update;
+ /* The list of allocated free pages, waiting to be given back to mm */
+ struct list_head free_page_list;
+ spinlock_t free_page_list_lock;
+ /* The number of free page blocks on the above list */
+ unsigned long num_free_page_blocks;
+ /* The cmd id received from host */
+ u32 cmd_id_received;
+ /* The cmd id that is actively in use */
+ __virtio32 cmd_id_active;
+ /* Buffer to store the stop sign */
+ __virtio32 cmd_id_stop;
+
/* Waiting for host to ack the pages we released. */
wait_queue_head_t acked;
@@ -320,17 +353,6 @@ static void stats_handle_request(struct virtio_balloon *vb)
virtqueue_kick(vq);
}
-static void virtballoon_changed(struct virtio_device *vdev)
-{
- struct virtio_balloon *vb = vdev->priv;
- unsigned long flags;
-
- spin_lock_irqsave(&vb->stop_update_lock, flags);
- if (!vb->stop_update)
- queue_work(system_freezable_wq, &vb->update_balloon_size_work);
- spin_unlock_irqrestore(&vb->stop_update_lock, flags);
-}
-
static inline s64 towards_target(struct virtio_balloon *vb)
{
s64 target;
@@ -347,6 +369,60 @@ static inline s64 towards_target(struct virtio_balloon *vb)
return target - vb->num_pages;
}
+/* Gives back @num_to_return blocks of free pages to mm. */
+static unsigned long return_free_pages_to_mm(struct virtio_balloon *vb,
+ unsigned long num_to_return)
+{
+ struct page *page;
+ unsigned long num_returned;
+
+ spin_lock_irq(&vb->free_page_list_lock);
+ for (num_returned = 0; num_returned < num_to_return; num_returned++) {
+ page = balloon_page_pop(&vb->free_page_list);
+ if (!page)
+ break;
+ free_pages((unsigned long)page_address(page),
+ VIRTIO_BALLOON_FREE_PAGE_ORDER);
+ }
+ vb->num_free_page_blocks -= num_returned;
+ spin_unlock_irq(&vb->free_page_list_lock);
+
+ return num_returned;
+}
+
+static void virtballoon_changed(struct virtio_device *vdev)
+{
+ struct virtio_balloon *vb = vdev->priv;
+ unsigned long flags;
+ s64 diff = towards_target(vb);
+
+ if (diff) {
+ spin_lock_irqsave(&vb->stop_update_lock, flags);
+ if (!vb->stop_update)
+ queue_work(system_freezable_wq,
+ &vb->update_balloon_size_work);
+ spin_unlock_irqrestore(&vb->stop_update_lock, flags);
+ }
+
+ if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+ virtio_cread(vdev, struct virtio_balloon_config,
+ free_page_report_cmd_id, &vb->cmd_id_received);
+ if (vb->cmd_id_received == VIRTIO_BALLOON_CMD_ID_DONE) {
+ /* Pass ULONG_MAX to give back all the free pages */
+ return_free_pages_to_mm(vb, ULONG_MAX);
+ } else if (vb->cmd_id_received != VIRTIO_BALLOON_CMD_ID_STOP &&
+ vb->cmd_id_received !=
+ virtio32_to_cpu(vdev, vb->cmd_id_active)) {
+ spin_lock_irqsave(&vb->stop_update_lock, flags);
+ if (!vb->stop_update) {
+ queue_work(vb->balloon_wq,
+ &vb->report_free_page_work);
+ }
+ spin_unlock_irqrestore(&vb->stop_update_lock, flags);
+ }
+ }
+}
+
static void update_balloon_size(struct virtio_balloon *vb)
{
u32 actual = vb->num_pages;
@@ -389,26 +465,44 @@ static void update_balloon_size_func(struct work_struct *work)
static int init_vqs(struct virtio_balloon *vb)
{
- struct virtqueue *vqs[3];
- vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request };
- static const char * const names[] = { "inflate", "deflate", "stats" };
- int err, nvqs;
+ struct virtqueue *vqs[VIRTIO_BALLOON_VQ_MAX];
+ vq_callback_t *callbacks[VIRTIO_BALLOON_VQ_MAX];
+ const char *names[VIRTIO_BALLOON_VQ_MAX];
+ int err;
/*
- * We expect two virtqueues: inflate and deflate, and
- * optionally stat.
+ * Inflateq and deflateq are used unconditionally. The names[]
+ * will be NULL if the related feature is not enabled, which will
+ * cause no allocation for the corresponding virtqueue in find_vqs.
*/
- nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
- err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL);
+ callbacks[VIRTIO_BALLOON_VQ_INFLATE] = balloon_ack;
+ names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate";
+ callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack;
+ names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate";
+ names[VIRTIO_BALLOON_VQ_STATS] = NULL;
+ names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
+
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
+ names[VIRTIO_BALLOON_VQ_STATS] = "stats";
+ callbacks[VIRTIO_BALLOON_VQ_STATS] = stats_request;
+ }
+
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+ names[VIRTIO_BALLOON_VQ_FREE_PAGE] = "free_page_vq";
+ callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
+ }
+
+ err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX,
+ vqs, callbacks, names, NULL, NULL);
if (err)
return err;
- vb->inflate_vq = vqs[0];
- vb->deflate_vq = vqs[1];
+ vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE];
+ vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE];
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
struct scatterlist sg;
unsigned int num_stats;
- vb->stats_vq = vqs[2];
+ vb->stats_vq = vqs[VIRTIO_BALLOON_VQ_STATS];
/*
* Prime this virtqueue with one buffer so the hypervisor can
@@ -426,9 +520,145 @@ static int init_vqs(struct virtio_balloon *vb)
}
virtqueue_kick(vb->stats_vq);
}
+
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
+ vb->free_page_vq = vqs[VIRTIO_BALLOON_VQ_FREE_PAGE];
+
+ return 0;
+}
+
+static int send_cmd_id_start(struct virtio_balloon *vb)
+{
+ struct scatterlist sg;
+ struct virtqueue *vq = vb->free_page_vq;
+ int err, unused;
+
+ /* Detach all the used buffers from the vq */
+ while (virtqueue_get_buf(vq, &unused))
+ ;
+
+ vb->cmd_id_active = cpu_to_virtio32(vb->vdev, vb->cmd_id_received);
+ sg_init_one(&sg, &vb->cmd_id_active, sizeof(vb->cmd_id_active));
+ err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_active, GFP_KERNEL);
+ if (!err)
+ virtqueue_kick(vq);
+ return err;
+}
+
+static int send_cmd_id_stop(struct virtio_balloon *vb)
+{
+ struct scatterlist sg;
+ struct virtqueue *vq = vb->free_page_vq;
+ int err, unused;
+
+ /* Detach all the used buffers from the vq */
+ while (virtqueue_get_buf(vq, &unused))
+ ;
+
+ sg_init_one(&sg, &vb->cmd_id_stop, sizeof(vb->cmd_id_stop));
+ err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_stop, GFP_KERNEL);
+ if (!err)
+ virtqueue_kick(vq);
+ return err;
+}
+
+static int get_free_page_and_send(struct virtio_balloon *vb)
+{
+ struct virtqueue *vq = vb->free_page_vq;
+ struct page *page;
+ struct scatterlist sg;
+ int err, unused;
+ void *p;
+
+ /* Detach all the used buffers from the vq */
+ while (virtqueue_get_buf(vq, &unused))
+ ;
+
+ page = alloc_pages(VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG,
+ VIRTIO_BALLOON_FREE_PAGE_ORDER);
+ /*
+ * When the allocation returns NULL, it indicates that we have got all
+ * the possible free pages, so return -EINTR to stop.
+ */
+ if (!page)
+ return -EINTR;
+
+ p = page_address(page);
+ sg_init_one(&sg, p, VIRTIO_BALLOON_FREE_PAGE_SIZE);
+ /* There is always 1 entry reserved for the cmd id to use. */
+ if (vq->num_free > 1) {
+ err = virtqueue_add_inbuf(vq, &sg, 1, p, GFP_KERNEL);
+ if (unlikely(err)) {
+ free_pages((unsigned long)p,
+ VIRTIO_BALLOON_FREE_PAGE_ORDER);
+ return err;
+ }
+ virtqueue_kick(vq);
+ spin_lock_irq(&vb->free_page_list_lock);
+ balloon_page_push(&vb->free_page_list, page);
+ vb->num_free_page_blocks++;
+ spin_unlock_irq(&vb->free_page_list_lock);
+ } else {
+ /*
+ * The vq has no available entry to add this page block, so
+ * just free it.
+ */
+ free_pages((unsigned long)p, VIRTIO_BALLOON_FREE_PAGE_ORDER);
+ }
+
return 0;
}
+static int send_free_pages(struct virtio_balloon *vb)
+{
+ int err;
+ u32 cmd_id_active;
+
+ while (1) {
+ /*
+ * If a stop id or a new cmd id was just received from host,
+ * stop the reporting.
+ */
+ cmd_id_active = virtio32_to_cpu(vb->vdev, vb->cmd_id_active);
+ if (cmd_id_active != vb->cmd_id_received)
+ break;
+
+ /*
+ * The free page blocks are allocated and sent to host one by
+ * one.
+ */
+ err = get_free_page_and_send(vb);
+ if (err == -EINTR)
+ break;
+ else if (unlikely(err))
+ return err;
+ }
+
+ return 0;
+}
+
+static void report_free_page_func(struct work_struct *work)
+{
+ int err;
+ struct virtio_balloon *vb = container_of(work, struct virtio_balloon,
+ report_free_page_work);
+ struct device *dev = &vb->vdev->dev;
+
+ /* Start by sending the received cmd id to host with an outbuf. */
+ err = send_cmd_id_start(vb);
+ if (unlikely(err))
+ dev_err(dev, "Failed to send a start id, err = %d\n", err);
+
+ err = send_free_pages(vb);
+ if (unlikely(err))
+ dev_err(dev, "Failed to send a free page, err = %d\n", err);
+
+ /* End by sending a stop id to host with an outbuf. */
+ err = send_cmd_id_stop(vb);
+ if (unlikely(err))
+ dev_err(dev, "Failed to send a stop id, err = %d\n", err);
+}
+
#ifdef CONFIG_BALLOON_COMPACTION
/*
* virtballoon_migratepage - perform the balloon page migration on behalf of
@@ -512,14 +742,23 @@ static struct file_system_type balloon_fs = {
#endif /* CONFIG_BALLOON_COMPACTION */
-static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker,
- struct shrink_control *sc)
+static unsigned long shrink_free_pages(struct virtio_balloon *vb,
+ unsigned long pages_to_free)
{
- unsigned long pages_to_free, pages_freed = 0;
- struct virtio_balloon *vb = container_of(shrinker,
- struct virtio_balloon, shrinker);
+ unsigned long blocks_to_free, blocks_freed;
- pages_to_free = sc->nr_to_scan * VIRTIO_BALLOON_PAGES_PER_PAGE;
+ pages_to_free = round_up(pages_to_free,
+ 1 << VIRTIO_BALLOON_FREE_PAGE_ORDER);
+ blocks_to_free = pages_to_free >> VIRTIO_BALLOON_FREE_PAGE_ORDER;
+ blocks_freed = return_free_pages_to_mm(vb, blocks_to_free);
+
+ return blocks_freed << VIRTIO_BALLOON_FREE_PAGE_ORDER;
+}
+
+static unsigned long shrink_balloon_pages(struct virtio_balloon *vb,
+ unsigned long pages_to_free)
+{
+ unsigned long pages_freed = 0;
/*
* One invocation of leak_balloon can deflate at most
@@ -527,12 +766,33 @@ static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker,
* multiple times to deflate pages till reaching pages_to_free.
*/
while (vb->num_pages && pages_to_free) {
+ pages_freed += leak_balloon(vb, pages_to_free) /
+ VIRTIO_BALLOON_PAGES_PER_PAGE;
pages_to_free -= pages_freed;
- pages_freed += leak_balloon(vb, pages_to_free);
}
update_balloon_size(vb);
- return pages_freed / VIRTIO_BALLOON_PAGES_PER_PAGE;
+ return pages_freed;
+}
+
+static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker,
+ struct shrink_control *sc)
+{
+ unsigned long pages_to_free, pages_freed = 0;
+ struct virtio_balloon *vb = container_of(shrinker,
+ struct virtio_balloon, shrinker);
+
+ pages_to_free = sc->nr_to_scan * VIRTIO_BALLOON_PAGES_PER_PAGE;
+
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
+ pages_freed = shrink_free_pages(vb, pages_to_free);
+
+ if (pages_freed >= pages_to_free)
+ return pages_freed;
+
+ pages_freed += shrink_balloon_pages(vb, pages_to_free - pages_freed);
+
+ return pages_freed;
}
static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker,
@@ -540,8 +800,12 @@ static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker,
{
struct virtio_balloon *vb = container_of(shrinker,
struct virtio_balloon, shrinker);
+ unsigned long count;
- return vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
+ count = vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
+ count += vb->num_free_page_blocks >> VIRTIO_BALLOON_FREE_PAGE_ORDER;
+
+ return count;
}
static void virtio_balloon_unregister_shrinker(struct virtio_balloon *vb)
@@ -604,6 +868,31 @@ static int virtballoon_probe(struct virtio_device *vdev)
}
vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops;
#endif
+ if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+ /*
+ * There is always one entry reserved for cmd id, so the ring
+ * size needs to be at least two to report free page hints.
+ */
+ if (virtqueue_get_vring_size(vb->free_page_vq) < 2) {
+ err = -ENOSPC;
+ goto out_del_vqs;
+ }
+ vb->balloon_wq = alloc_workqueue("balloon-wq",
+ WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0);
+ if (!vb->balloon_wq) {
+ err = -ENOMEM;
+ goto out_del_vqs;
+ }
+ INIT_WORK(&vb->report_free_page_work, report_free_page_func);
+ vb->cmd_id_received = VIRTIO_BALLOON_CMD_ID_STOP;
+ vb->cmd_id_active = cpu_to_virtio32(vb->vdev,
+ VIRTIO_BALLOON_CMD_ID_STOP);
+ vb->cmd_id_stop = cpu_to_virtio32(vb->vdev,
+ VIRTIO_BALLOON_CMD_ID_STOP);
+ vb->num_free_page_blocks = 0;
+ spin_lock_init(&vb->free_page_list_lock);
+ INIT_LIST_HEAD(&vb->free_page_list);
+ }
/*
* We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a
* shrinker needs to be registered to relieve memory pressure.
@@ -611,7 +900,7 @@ static int virtballoon_probe(struct virtio_device *vdev)
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) {
err = virtio_balloon_register_shrinker(vb);
if (err)
- goto out_del_vqs;
+ goto out_del_balloon_wq;
}
virtio_device_ready(vdev);
@@ -619,6 +908,9 @@ static int virtballoon_probe(struct virtio_device *vdev)
virtballoon_changed(vdev);
return 0;
+out_del_balloon_wq:
+ if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
+ destroy_workqueue(vb->balloon_wq);
out_del_vqs:
vdev->config->del_vqs(vdev);
out_free_vb:
@@ -652,6 +944,11 @@ static void virtballoon_remove(struct virtio_device *vdev)
cancel_work_sync(&vb->update_balloon_size_work);
cancel_work_sync(&vb->update_balloon_stats_work);
+ if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+ cancel_work_sync(&vb->report_free_page_work);
+ destroy_workqueue(vb->balloon_wq);
+ }
+
remove_common(vb);
#ifdef CONFIG_BALLOON_COMPACTION
if (vb->vb_dev_info.inode)
@@ -703,6 +1000,7 @@ static unsigned int features[] = {
VIRTIO_BALLOON_F_MUST_TELL_HOST,
VIRTIO_BALLOON_F_STATS_VQ,
VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
+ VIRTIO_BALLOON_F_FREE_PAGE_HINT,
};
static struct virtio_driver virtio_balloon_driver = {
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index 13b8cb5..47c9eb4 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -34,15 +34,20 @@
#define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */
#define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */
#define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */
+#define VIRTIO_BALLOON_F_FREE_PAGE_HINT 3 /* VQ to report free pages */
/* Size of a PFN in the balloon interface. */
#define VIRTIO_BALLOON_PFN_SHIFT 12
+#define VIRTIO_BALLOON_CMD_ID_STOP 0
+#define VIRTIO_BALLOON_CMD_ID_DONE 1
struct virtio_balloon_config {
/* Number of pages host wants Guest to give up. */
__u32 num_pages;
/* Number of pages we've actually got in balloon. */
__u32 actual;
+ /* Free page report command id, readonly by guest */
+ __u32 free_page_report_cmd_id;
};
#define VIRTIO_BALLOON_S_SWAP_IN 0 /* Amount of memory swapped in */
--
2.7.4
^ permalink raw reply related
* [PATCH v37 0/3] Virtio-balloon: support free page reporting
From: Wei Wang @ 2018-08-27 1:32 UTC (permalink / raw)
To: virtio-dev, linux-kernel, virtualization, kvm, linux-mm, mst,
mhocko, akpm, dgilbert
Cc: yang.zhang.wz, riel, quan.xu0, liliang.opensource, pbonzini,
nilal, torvalds
The new feature, VIRTIO_BALLOON_F_FREE_PAGE_HINT, implemented by this
series enables the virtio-balloon driver to report hints of guest free
pages to host. It can be used to accelerate virtual machine (VM) live
migration. Here is an introduction of this usage:
Live migration needs to transfer the VM's memory from the source machine
to the destination round by round. For the 1st round, all the VM's memory
is transferred. From the 2nd round, only the pieces of memory that were
written by the guest (after the 1st round) are transferred. One method
that is popularly used by the hypervisor to track which part of memory is
written is to have the hypervisor write-protect all the guest memory.
This feature enables the optimization by skipping the transfer of guest
free pages during VM live migration. It is not concerned that the memory
pages are used after they are given to the hypervisor as a hint of the
free pages, because they will be tracked by the hypervisor and transferred
in the subsequent round if they are used and written.
* Tests
1 Test Environment
Host: Intel(R) Xeon(R) CPU E5-2699 v4 @ 2.20GHz
Migration setup: migrate_set_speed 100G, migrate_set_downtime 400ms
2 Test Results (results are averaged over several repeated runs)
2.1 Guest setup: 8G RAM, 4 vCPU
2.1.1 Idle guest live migration time
Optimization v.s. Legacy = 620ms vs 2970ms
--> ~79% reduction
2.1.2 Guest live migration with Linux compilation workload
(i.e. make bzImage -j4) running
1) Live Migration Time:
Optimization v.s. Legacy = 2273ms v.s. 4502ms
--> ~50% reduction
2) Linux Compilation Time:
Optimization v.s. Legacy = 8min42s v.s. 8min43s
--> no obvious difference
2.2 Guest setup: 128G RAM, 4 vCPU
2.2.1 Idle guest live migration time
Optimization v.s. Legacy = 5294ms vs 41651ms
--> ~87% reduction
2.2.2 Guest live migration with Linux compilation workload
1) Live Migration Time:
Optimization v.s. Legacy = 8816ms v.s. 54201ms
--> 84% reduction
2) Linux Compilation Time:
Optimization v.s. Legacy = 8min30s v.s. 8min36s
--> no obvious difference
ChangeLog:
v36->v37:
- free the reported pages to mm when receives a DONE cmd from host.
Please see patch 1's commit log for reasons. Please see patch 1's
commit for detailed explanations.
For ChangeLogs from v22 to v36, please reference
https://lkml.org/lkml/2018/7/20/199
For ChangeLogs before v21, please reference
https://lwn.net/Articles/743660/
Wei Wang (3):
virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT
mm/page_poison: expose page_poisoning_enabled to kernel modules
virtio-balloon: VIRTIO_BALLOON_F_PAGE_POISON
drivers/virtio/virtio_balloon.c | 374 ++++++++++++++++++++++++++++++++----
include/uapi/linux/virtio_balloon.h | 8 +
mm/page_poison.c | 6 +
3 files changed, 355 insertions(+), 33 deletions(-)
--
2.7.4
^ permalink raw reply
* Re: [PATCH net] vhost: correctly check the iova range when waking virtqueue
From: David Miller @ 2018-08-26 0:40 UTC (permalink / raw)
To: jasowang; +Cc: kvm, mst, netdev, linux-kernel, virtualization
In-Reply-To: <20180824085313.21798-1-jasowang@redhat.com>
From: Jason Wang <jasowang@redhat.com>
Date: Fri, 24 Aug 2018 16:53:13 +0800
> We don't wakeup the virtqueue if the first byte of pending iova range
> is the last byte of the range we just got updated. This will lead a
> virtqueue to wait for IOTLB updating forever. Fixing by correct the
> check and wake up the virtqueue in this case.
>
> Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API")
> Reported-by: Peter Xu <peterx@redhat.com>
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
> The patch is needed for -stable.
Applied and queued up for -stable, thanks Jason.
^ permalink raw reply
* Re: [PATCH v2 10/11] x86/paravirt: move the Xen-only pv_mmu_ops under the PARAVIRT_XXL umbrella
From: Juergen Gross @ 2018-08-24 14:15 UTC (permalink / raw)
To: Peter Zijlstra
Cc: rusty, x86, linux-kernel, virtualization, mingo, tglx, hpa,
xen-devel, akataria, boris.ostrovsky
In-Reply-To: <20180824141218.GP24124@hirez.programming.kicks-ass.net>
On 24/08/18 16:12, Peter Zijlstra wrote:
> On Mon, Aug 13, 2018 at 09:37:38AM +0200, Juergen Gross wrote:
>> struct pv_mmu_ops {
>> + /* TLB operations */
>> + void (*flush_tlb_user)(void);
>> + void (*flush_tlb_kernel)(void);
>> + void (*flush_tlb_one_user)(unsigned long addr);
>> + void (*flush_tlb_others)(const struct cpumask *cpus,
>> + const struct flush_tlb_info *info);
>> +
>> + /* Hook for intercepting the destruction of an mm_struct. */
>> + void (*exit_mmap)(struct mm_struct *mm);
>
> Right, so I just wrecked that for you by adding a new:
> tlb_remove_table virt function. But I don't suppose that's a difficult
> thing to fix up.
Right. This will stay outside of XXL, I think. :-)
Juergen
^ permalink raw reply
* Re: [PATCH v2 09/11] x86/paravirt: move the Xen-only pv_irq_ops under the PARAVIRT_XXL umbrella
From: Juergen Gross @ 2018-08-24 14:13 UTC (permalink / raw)
To: Peter Zijlstra
Cc: rusty, x86, linux-kernel, virtualization, mingo, tglx, hpa,
xen-devel, akataria, boris.ostrovsky
In-Reply-To: <20180824141045.GO24124@hirez.programming.kicks-ass.net>
On 24/08/18 16:10, Peter Zijlstra wrote:
> On Mon, Aug 13, 2018 at 09:37:37AM +0200, Juergen Gross wrote:
>> Some of the paravirt ops defined in pv_irq_ops are for Xen PV guests
>> only. Define them only if CONFIG_PARAVIRT_XXL is set.
>> diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
>> index e652ec27d945..ae53ee36d8fb 100644
>> --- a/arch/x86/include/asm/paravirt_types.h
>> +++ b/arch/x86/include/asm/paravirt_types.h
>> @@ -197,8 +197,10 @@ struct pv_irq_ops {
>> struct paravirt_callee_save irq_disable;
>> struct paravirt_callee_save irq_enable;
>>
>> +#ifdef CONFIG_PARAVIRT_XXL
>> void (*safe_halt)(void);
>> void (*halt)(void);
>> +#endif
>
> that makes me sad... but it appears VSMP also uses them. Can't you
> simply make VSMP also select XXL, I don't think that's used quite as
> much as Xen is :-)
>
Sure, why not?
Any objections?
Juergen
^ permalink raw reply
* Re: [PATCH v2 00/11] x86/paravirt: several cleanups
From: Peter Zijlstra @ 2018-08-24 14:13 UTC (permalink / raw)
To: Juergen Gross
Cc: rusty, x86, linux-kernel, virtualization, mingo, tglx, hpa,
xen-devel, akataria, boris.ostrovsky
In-Reply-To: <45bfe8ab-683f-ab79-e3c6-c0a707b667c2@suse.com>
On Fri, Aug 24, 2018 at 03:52:55PM +0200, Juergen Gross wrote:
> Ping?
Looking good; although I messed it up a little bit by adding a new
paravirt function.
Thanks for doing this!
^ permalink raw reply
* Re: [PATCH v2 10/11] x86/paravirt: move the Xen-only pv_mmu_ops under the PARAVIRT_XXL umbrella
From: Peter Zijlstra @ 2018-08-24 14:12 UTC (permalink / raw)
To: Juergen Gross
Cc: rusty, x86, linux-kernel, virtualization, mingo, tglx, hpa,
xen-devel, akataria, boris.ostrovsky
In-Reply-To: <20180813073739.26108-11-jgross@suse.com>
On Mon, Aug 13, 2018 at 09:37:38AM +0200, Juergen Gross wrote:
> struct pv_mmu_ops {
> + /* TLB operations */
> + void (*flush_tlb_user)(void);
> + void (*flush_tlb_kernel)(void);
> + void (*flush_tlb_one_user)(unsigned long addr);
> + void (*flush_tlb_others)(const struct cpumask *cpus,
> + const struct flush_tlb_info *info);
> +
> + /* Hook for intercepting the destruction of an mm_struct. */
> + void (*exit_mmap)(struct mm_struct *mm);
Right, so I just wrecked that for you by adding a new:
tlb_remove_table virt function. But I don't suppose that's a difficult
thing to fix up.
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox