Linux virtualization list
 help / color / mirror / Atom feed
* [PATCH v3 07/15] x86/paravirt: remove clobbers from struct paravirt_patch_site
From: Juergen Gross @ 2018-08-28  7:40 UTC (permalink / raw)
  To: linux-kernel, xen-devel, x86, virtualization
  Cc: Juergen Gross, boris.ostrovsky, rusty, mingo, hpa, akataria, tglx
In-Reply-To: <20180828074026.820-1-jgross@suse.com>

There is no need any longer to store the clobbers in struct
paravirt_patch_site. Remove clobbers from the struct and from the
related macros.

While at it fix some lines longer than 80 characters.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/paravirt.h       | 33 +++++++++++++++------------------
 arch/x86/include/asm/paravirt_types.h |  1 -
 2 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index e375d4266b53..e1364cb40ce5 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -827,7 +827,7 @@ extern void default_banner(void);
 
 #else  /* __ASSEMBLY__ */
 
-#define _PVSITE(ptype, clobbers, ops, word, algn)	\
+#define _PVSITE(ptype, ops, word, algn)		\
 771:;						\
 	ops;					\
 772:;						\
@@ -836,7 +836,6 @@ extern void default_banner(void);
 	 word 771b;				\
 	 .byte ptype;				\
 	 .byte 772b-771b;			\
-	 .short clobbers;			\
 	.popsection
 
 
@@ -869,7 +868,7 @@ extern void default_banner(void);
 	COND_POP(set, CLBR_RAX, rax)
 
 #define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 8)
-#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
+#define PARA_SITE(ptype, ops)	_PVSITE(ptype, ops, .quad, 8)
 #define PARA_INDIRECT(addr)	*addr(%rip)
 #else
 #define PV_SAVE_REGS(set)			\
@@ -884,26 +883,26 @@ extern void default_banner(void);
 	COND_POP(set, CLBR_EAX, eax)
 
 #define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 4)
-#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4)
+#define PARA_SITE(ptype, ops)	_PVSITE(ptype, ops, .long, 4)
 #define PARA_INDIRECT(addr)	*%cs:addr
 #endif
 
 #define INTERRUPT_RETURN						\
-	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE,	\
-		  ANNOTATE_RETPOLINE_SAFE;					\
+	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret),			\
+		  ANNOTATE_RETPOLINE_SAFE;				\
 		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret);)
 
 #define DISABLE_INTERRUPTS(clobbers)					\
-	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
+	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable),		\
 		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);		\
-		  ANNOTATE_RETPOLINE_SAFE;					\
+		  ANNOTATE_RETPOLINE_SAFE;				\
 		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable);	\
 		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
 
 #define ENABLE_INTERRUPTS(clobbers)					\
-	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers,	\
+	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable),		\
 		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);		\
-		  ANNOTATE_RETPOLINE_SAFE;					\
+		  ANNOTATE_RETPOLINE_SAFE;				\
 		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable);	\
 		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
 
@@ -921,8 +920,7 @@ extern void default_banner(void);
  * inlined, or the swapgs instruction must be trapped and emulated.
  */
 #define SWAPGS_UNSAFE_STACK						\
-	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,	\
-		  swapgs)
+	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), swapgs)
 
 /*
  * Note: swapgs is very special, and in practise is either going to be
@@ -931,8 +929,8 @@ extern void default_banner(void);
  * it.
  */
 #define SWAPGS								\
-	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,	\
-		  ANNOTATE_RETPOLINE_SAFE;					\
+	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs),		\
+		  ANNOTATE_RETPOLINE_SAFE;				\
 		  call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs);		\
 		 )
 
@@ -942,15 +940,14 @@ extern void default_banner(void);
 
 #define USERGS_SYSRET64							\
 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),	\
-		  CLBR_NONE,						\
-		  ANNOTATE_RETPOLINE_SAFE;					\
+		  ANNOTATE_RETPOLINE_SAFE;				\
 		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64);)
 
 #ifdef CONFIG_DEBUG_ENTRY
 #define SAVE_FLAGS(clobbers)                                        \
-	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
+	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl),	    \
 		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);        \
-		  ANNOTATE_RETPOLINE_SAFE;				    \
+		  ANNOTATE_RETPOLINE_SAFE;			    \
 		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl);    \
 		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
 #endif
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 225d871373ed..b2220536a7d4 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -681,7 +681,6 @@ struct paravirt_patch_site {
 	u8 *instr; 		/* original instructions */
 	u8 instrtype;		/* type of this instruction */
 	u8 len;			/* length of original instruction */
-	u16 clobbers;		/* what registers you may clobber */
 };
 
 extern struct paravirt_patch_site __parainstructions[],
-- 
2.16.4

^ permalink raw reply related

* [PATCH v3 06/15] x86/paravirt: remove clobbers parameter from paravirt patch functions
From: Juergen Gross @ 2018-08-28  7:40 UTC (permalink / raw)
  To: linux-kernel, xen-devel, x86, virtualization
  Cc: Juergen Gross, boris.ostrovsky, rusty, mingo, hpa, akataria, tglx
In-Reply-To: <20180828074026.820-1-jgross@suse.com>

The clobbers parameter from paravirt_patch_default() et al isn't used
any longer. Remove it.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/paravirt_types.h |  7 +++----
 arch/x86/kernel/alternative.c         |  2 +-
 arch/x86/kernel/paravirt.c            | 14 +++++---------
 arch/x86/kernel/paravirt_patch_32.c   |  5 ++---
 arch/x86/kernel/paravirt_patch_64.c   |  5 ++---
 arch/x86/kernel/vsmp_64.c             |  6 +++---
 6 files changed, 16 insertions(+), 23 deletions(-)

diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 7d13197d760b..225d871373ed 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -85,7 +85,7 @@ struct pv_init_ops {
 	 * the number of bytes of code generated, as we nop pad the
 	 * rest in generic code.
 	 */
-	unsigned (*patch)(u8 type, u16 clobber, void *insnbuf,
+	unsigned (*patch)(u8 type, void *insnbuf,
 			  unsigned long addr, unsigned len);
 } __no_randomize_layout;
 
@@ -373,14 +373,13 @@ extern struct pv_lock_ops pv_lock_ops;
 
 unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len);
 unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
-unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
+unsigned paravirt_patch_default(u8 type, void *insnbuf,
 				unsigned long addr, unsigned len);
 
 unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
 			      const char *start, const char *end);
 
-unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
-		      unsigned long addr, unsigned len);
+unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len);
 
 int paravirt_disable_iospace(void);
 
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 014f214da581..7985c20601b4 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -594,7 +594,7 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
 		BUG_ON(p->len > MAX_PATCH_LEN);
 		/* prep the buffer with the original instructions */
 		memcpy(insnbuf, p->instr, p->len);
-		used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
+		used = pv_init_ops.patch(p->instrtype, insnbuf,
 					 (unsigned long)p->instr, p->len);
 
 		BUG_ON(used > p->len);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 1abdbde35049..287d34513f6a 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -81,10 +81,8 @@ struct branch {
 	u32 delta;
 } __attribute__((packed));
 
-static unsigned paravirt_patch_call(void *insnbuf,
-				    const void *target, u16 tgt_clobbers,
-				    unsigned long addr, u16 site_clobbers,
-				    unsigned len)
+static unsigned paravirt_patch_call(void *insnbuf, const void *target,
+				    unsigned long addr, unsigned len)
 {
 	struct branch *b = insnbuf;
 	unsigned long delta = (unsigned long)target - (addr+5);
@@ -149,7 +147,7 @@ static void *get_call_destination(u8 type)
 	return *((void **)&tmpl + type);
 }
 
-unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
+unsigned paravirt_patch_default(u8 type, void *insnbuf,
 				unsigned long addr, unsigned len)
 {
 	void *opfunc = get_call_destination(type);
@@ -172,10 +170,8 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
 		/* If operation requires a jmp, then jmp */
 		ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
 	else
-		/* Otherwise call the function; assume target could
-		   clobber any caller-save reg */
-		ret = paravirt_patch_call(insnbuf, opfunc, CLBR_ANY,
-					  addr, clobbers, len);
+		/* Otherwise call the function. */
+		ret = paravirt_patch_call(insnbuf, opfunc, addr, len);
 
 	return ret;
 }
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index 758e69d72ebf..e5c3a438149e 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -30,8 +30,7 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
 extern bool pv_is_native_spin_unlock(void);
 extern bool pv_is_native_vcpu_is_preempted(void);
 
-unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
-		      unsigned long addr, unsigned len)
+unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
 {
 	const unsigned char *start, *end;
 	unsigned ret;
@@ -70,7 +69,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 
 	default:
 patch_default: __maybe_unused
-		ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
+		ret = paravirt_patch_default(type, ibuf, addr, len);
 		break;
 
 patch_site:
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 9cb98f7b07c9..835f1985a115 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -38,8 +38,7 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
 extern bool pv_is_native_spin_unlock(void);
 extern bool pv_is_native_vcpu_is_preempted(void);
 
-unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
-		      unsigned long addr, unsigned len)
+unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
 {
 	const unsigned char *start, *end;
 	unsigned ret;
@@ -80,7 +79,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 
 	default:
 patch_default: __maybe_unused
-		ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
+		ret = paravirt_patch_default(type, ibuf, addr, len);
 		break;
 
 patch_site:
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index 44685fb2a192..f194e5e1e95c 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -69,7 +69,7 @@ asmlinkage __visible void vsmp_irq_enable(void)
 }
 PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_enable);
 
-static unsigned __init vsmp_patch(u8 type, u16 clobbers, void *ibuf,
+static unsigned __init vsmp_patch(u8 type, void *ibuf,
 				  unsigned long addr, unsigned len)
 {
 	switch (type) {
@@ -77,9 +77,9 @@ static unsigned __init vsmp_patch(u8 type, u16 clobbers, void *ibuf,
 	case PARAVIRT_PATCH(pv_irq_ops.irq_disable):
 	case PARAVIRT_PATCH(pv_irq_ops.save_fl):
 	case PARAVIRT_PATCH(pv_irq_ops.restore_fl):
-		return paravirt_patch_default(type, clobbers, ibuf, addr, len);
+		return paravirt_patch_default(type, ibuf, addr, len);
 	default:
-		return native_patch(type, clobbers, ibuf, addr, len);
+		return native_patch(type, ibuf, addr, len);
 	}
 
 }
-- 
2.16.4

^ permalink raw reply related

* [PATCH v3 05/15] x86/paravirt: make paravirt_patch_call() and paravirt_patch_jmp() static
From: Juergen Gross @ 2018-08-28  7:40 UTC (permalink / raw)
  To: linux-kernel, xen-devel, x86, virtualization
  Cc: Juergen Gross, boris.ostrovsky, rusty, mingo, hpa, akataria, tglx
In-Reply-To: <20180828074026.820-1-jgross@suse.com>

paravirt_patch_call() and paravirt_patch_jmp() are used in paravirt.c
only. Convert them to static.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/paravirt_types.h |  6 ------
 arch/x86/kernel/paravirt.c            | 12 ++++++------
 2 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 4b75acc23b30..7d13197d760b 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -373,12 +373,6 @@ extern struct pv_lock_ops pv_lock_ops;
 
 unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len);
 unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
-unsigned paravirt_patch_call(void *insnbuf,
-			     const void *target, u16 tgt_clobbers,
-			     unsigned long addr, u16 site_clobbers,
-			     unsigned len);
-unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
-			    unsigned long addr, unsigned len);
 unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
 				unsigned long addr, unsigned len);
 
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index afdb303285f8..1abdbde35049 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -81,10 +81,10 @@ struct branch {
 	u32 delta;
 } __attribute__((packed));
 
-unsigned paravirt_patch_call(void *insnbuf,
-			     const void *target, u16 tgt_clobbers,
-			     unsigned long addr, u16 site_clobbers,
-			     unsigned len)
+static unsigned paravirt_patch_call(void *insnbuf,
+				    const void *target, u16 tgt_clobbers,
+				    unsigned long addr, u16 site_clobbers,
+				    unsigned len)
 {
 	struct branch *b = insnbuf;
 	unsigned long delta = (unsigned long)target - (addr+5);
@@ -103,8 +103,8 @@ unsigned paravirt_patch_call(void *insnbuf,
 	return 5;
 }
 
-unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
-			    unsigned long addr, unsigned len)
+static unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
+				   unsigned long addr, unsigned len)
 {
 	struct branch *b = insnbuf;
 	unsigned long delta = (unsigned long)target - (addr+5);
-- 
2.16.4

^ permalink raw reply related

* [PATCH v3 04/15] xen: add SPDX identifier in arch/x86/xen files
From: Juergen Gross @ 2018-08-28  7:40 UTC (permalink / raw)
  To: linux-kernel, xen-devel, x86, virtualization
  Cc: Juergen Gross, boris.ostrovsky, rusty, mingo, hpa, akataria, tglx
In-Reply-To: <20180828074026.820-1-jgross@suse.com>

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 arch/x86/xen/efi.c                 | 14 +-------------
 arch/x86/xen/enlighten.c           |  2 ++
 arch/x86/xen/enlighten_hvm.c       |  2 ++
 arch/x86/xen/grant-table.c         | 25 +------------------------
 arch/x86/xen/mmu.c                 |  2 ++
 arch/x86/xen/mmu_pv.c              |  2 ++
 arch/x86/xen/p2m.c                 |  2 ++
 arch/x86/xen/pci-swiotlb-xen.c     |  2 ++
 arch/x86/xen/platform-pci-unplug.c | 16 ++--------------
 arch/x86/xen/vdso.h                |  2 ++
 arch/x86/xen/xen-pvh.S             | 15 ++-------------
 11 files changed, 20 insertions(+), 64 deletions(-)

diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
index 1804b27f9632..1fbb629a9d78 100644
--- a/arch/x86/xen/efi.c
+++ b/arch/x86/xen/efi.c
@@ -1,18 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2014 Oracle Co., Daniel Kiper
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/bitops.h>
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 2eeddd814653..749fb4b73eda 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+
 #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
 #include <linux/bootmem.h>
 #endif
diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index 19c1ff542387..0e75642d42a3 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+
 #include <linux/acpi.h>
 #include <linux/cpu.h>
 #include <linux/kexec.h>
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index 92ccc718152d..ecb0d5450334 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
 /******************************************************************************
  * grant_table.c
  * x86 specific part
@@ -8,30 +9,6 @@
  * Copyright (c) 2004-2005, K A Fraser
  * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
  *                    VA Linux Systems Japan. Split out x86 specific part.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
  */
 
 #include <linux/sched.h>
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index e0e13fe16d37..60e9c37fd79f 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+
 #include <linux/pfn.h>
 #include <asm/xen/page.h>
 #include <asm/xen/hypercall.h>
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index bdfac00b1ec8..1e9098f53967 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+
 /*
  * Xen mmu operations
  *
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 159a897151d6..d6d74efd8912 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+
 /*
  * Xen leaves the responsibility for maintaining p2m mappings to the
  * guests themselves, but it must also access and update the p2m array
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index 37c6056a7bba..33293ce01d8d 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+
 /* Glue code to lib/swiotlb-xen.c */
 
 #include <linux/dma-mapping.h>
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index 3957946a6cfe..2e794ac9d8e8 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+
 /******************************************************************************
  * platform-pci-unplug.c
  *
  * Xen platform PCI device driver
  * Copyright (c) 2010, Citrix
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
  */
 
 #include <linux/init.h>
diff --git a/arch/x86/xen/vdso.h b/arch/x86/xen/vdso.h
index 861fedfe5230..873c54c488fe 100644
--- a/arch/x86/xen/vdso.h
+++ b/arch/x86/xen/vdso.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
 /* Bit used for the pseudo-hwcap for non-negative segments.  We use
    bit 1 to avoid bugs in some versions of glibc when bit 0 is
    used; the choice is otherwise arbitrary. */
diff --git a/arch/x86/xen/xen-pvh.S b/arch/x86/xen/xen-pvh.S
index ca2d3b2bf2af..b0e471506cd8 100644
--- a/arch/x86/xen/xen-pvh.S
+++ b/arch/x86/xen/xen-pvh.S
@@ -1,18 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
 /*
  * Copyright C 2016, Oracle and/or its affiliates. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 	.code32
-- 
2.16.4

^ permalink raw reply related

* [PATCH v3 03/15] xen: link platform-pci-unplug.o only if CONFIG_XEN_PVHVM
From: Juergen Gross @ 2018-08-28  7:40 UTC (permalink / raw)
  To: linux-kernel, xen-devel, x86, virtualization
  Cc: Juergen Gross, boris.ostrovsky, rusty, mingo, hpa, akataria, tglx
In-Reply-To: <20180828074026.820-1-jgross@suse.com>

Instead of using one large #ifdef CONFIG_XEN_PVHVM in
arch/x86/xen/platform-pci-unplug.c add the object file depending on
CONFIG_XEN_PVHVM being set.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 arch/x86/xen/Makefile              | 2 +-
 arch/x86/xen/platform-pci-unplug.c | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index a964f307a266..dd2550d33b38 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -19,11 +19,11 @@ obj-y				+= mmu.o
 obj-y				+= time.o
 obj-y				+= grant-table.o
 obj-y				+= suspend.o
-obj-y				+= platform-pci-unplug.o
 
 obj-$(CONFIG_XEN_PVHVM)		+= enlighten_hvm.o
 obj-$(CONFIG_XEN_PVHVM)		+= mmu_hvm.o
 obj-$(CONFIG_XEN_PVHVM)		+= suspend_hvm.o
+obj-$(CONFIG_XEN_PVHVM)		+= platform-pci-unplug.o
 
 obj-$(CONFIG_XEN_PV)		+= setup.o
 obj-$(CONFIG_XEN_PV)		+= apic.o
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index 33a783c77d96..3957946a6cfe 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -30,7 +30,6 @@
 #define XEN_PLATFORM_ERR_PROTOCOL -2
 #define XEN_PLATFORM_ERR_BLACKLIST -3
 
-#ifdef CONFIG_XEN_PVHVM
 /* store the value of xen_emul_unplug after the unplug is done */
 static int xen_platform_pci_unplug;
 static int xen_emul_unplug;
@@ -214,4 +213,3 @@ static int __init parse_xen_emul_unplug(char *arg)
 	return 0;
 }
 early_param("xen_emul_unplug", parse_xen_emul_unplug);
-#endif
-- 
2.16.4

^ permalink raw reply related

* [PATCH v3 02/15] xen: move pv specific parts of arch/x86/xen/mmu.c to mmu_pv.c
From: Juergen Gross @ 2018-08-28  7:40 UTC (permalink / raw)
  To: linux-kernel, xen-devel, x86, virtualization
  Cc: Juergen Gross, boris.ostrovsky, rusty, mingo, hpa, akataria, tglx
In-Reply-To: <20180828074026.820-1-jgross@suse.com>

There are some PV specific functions in arch/x86/xen/mmu.c which can
be moved to mmu_pv.c. This in turn enables us to make multicalls.c
dependent on CONFIG_XEN_PV.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
V3:
- fix build failure on ARM (Boris Ostrovsky)
---
 arch/arm/xen/enlighten.c       |  34 --------
 arch/x86/xen/Makefile          |   2 +-
 arch/x86/xen/mmu.c             | 186 -----------------------------------------
 arch/x86/xen/mmu_pv.c          | 138 ++++++++++++++++++++++++++++++
 include/xen/interface/memory.h |   6 --
 include/xen/xen-ops.h          | 133 +++++++++++++++++++----------
 6 files changed, 227 insertions(+), 272 deletions(-)

diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 07060e5b5864..17e478928276 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -62,29 +62,6 @@ static __read_mostly unsigned int xen_events_irq;
 uint32_t xen_start_flags;
 EXPORT_SYMBOL(xen_start_flags);
 
-int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
-			       unsigned long addr,
-			       xen_pfn_t *gfn, int nr,
-			       int *err_ptr, pgprot_t prot,
-			       unsigned domid,
-			       struct page **pages)
-{
-	return xen_xlate_remap_gfn_array(vma, addr, gfn, nr, err_ptr,
-					 prot, domid, pages);
-}
-EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_array);
-
-/* Not used by XENFEAT_auto_translated guests. */
-int xen_remap_domain_gfn_range(struct vm_area_struct *vma,
-                              unsigned long addr,
-                              xen_pfn_t gfn, int nr,
-                              pgprot_t prot, unsigned domid,
-                              struct page **pages)
-{
-	return -ENOSYS;
-}
-EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_range);
-
 int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
 			       int nr, struct page **pages)
 {
@@ -92,17 +69,6 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
 }
 EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range);
 
-/* Not used by XENFEAT_auto_translated guests. */
-int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
-			       unsigned long addr,
-			       xen_pfn_t *mfn, int nr,
-			       int *err_ptr, pgprot_t prot,
-			       unsigned int domid, struct page **pages)
-{
-	return -ENOSYS;
-}
-EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);
-
 static void xen_read_wallclock(struct timespec64 *ts)
 {
 	u32 version;
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index f723b5aa8f74..a964f307a266 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -15,7 +15,6 @@ CFLAGS_enlighten_pv.o		:= $(nostackp)
 CFLAGS_mmu_pv.o			:= $(nostackp)
 
 obj-y				+= enlighten.o
-obj-y				+= multicalls.o
 obj-y				+= mmu.o
 obj-y				+= time.o
 obj-y				+= grant-table.o
@@ -34,6 +33,7 @@ obj-$(CONFIG_XEN_PV)		+= p2m.o
 obj-$(CONFIG_XEN_PV)		+= enlighten_pv.o
 obj-$(CONFIG_XEN_PV)		+= mmu_pv.o
 obj-$(CONFIG_XEN_PV)		+= irq.o
+obj-$(CONFIG_XEN_PV)		+= multicalls.o
 obj-$(CONFIG_XEN_PV)		+= xen-asm.o
 obj-$(CONFIG_XEN_PV)		+= xen-asm_$(BITS).o
 
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 96fc2f0fdbfe..e0e13fe16d37 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -6,12 +6,6 @@
 #include "multicalls.h"
 #include "mmu.h"
 
-/*
- * Protects atomic reservation decrease/increase against concurrent increases.
- * Also protects non-atomic updates of current_pages and balloon lists.
- */
-DEFINE_SPINLOCK(xen_reservation_lock);
-
 unsigned long arbitrary_virt_to_mfn(void *vaddr)
 {
 	xmaddr_t maddr = arbitrary_virt_to_machine(vaddr);
@@ -42,186 +36,6 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr)
 }
 EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine);
 
-static noinline void xen_flush_tlb_all(void)
-{
-	struct mmuext_op *op;
-	struct multicall_space mcs;
-
-	preempt_disable();
-
-	mcs = xen_mc_entry(sizeof(*op));
-
-	op = mcs.args;
-	op->cmd = MMUEXT_TLB_FLUSH_ALL;
-	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
-
-	xen_mc_issue(PARAVIRT_LAZY_MMU);
-
-	preempt_enable();
-}
-
-#define REMAP_BATCH_SIZE 16
-
-struct remap_data {
-	xen_pfn_t *pfn;
-	bool contiguous;
-	bool no_translate;
-	pgprot_t prot;
-	struct mmu_update *mmu_update;
-};
-
-static int remap_area_pfn_pte_fn(pte_t *ptep, pgtable_t token,
-				 unsigned long addr, void *data)
-{
-	struct remap_data *rmd = data;
-	pte_t pte = pte_mkspecial(mfn_pte(*rmd->pfn, rmd->prot));
-
-	/*
-	 * If we have a contiguous range, just update the pfn itself,
-	 * else update pointer to be "next pfn".
-	 */
-	if (rmd->contiguous)
-		(*rmd->pfn)++;
-	else
-		rmd->pfn++;
-
-	rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
-	rmd->mmu_update->ptr |= rmd->no_translate ?
-		MMU_PT_UPDATE_NO_TRANSLATE :
-		MMU_NORMAL_PT_UPDATE;
-	rmd->mmu_update->val = pte_val_ma(pte);
-	rmd->mmu_update++;
-
-	return 0;
-}
-
-static int do_remap_pfn(struct vm_area_struct *vma,
-			unsigned long addr,
-			xen_pfn_t *pfn, int nr,
-			int *err_ptr, pgprot_t prot,
-			unsigned int domid,
-			bool no_translate,
-			struct page **pages)
-{
-	int err = 0;
-	struct remap_data rmd;
-	struct mmu_update mmu_update[REMAP_BATCH_SIZE];
-	unsigned long range;
-	int mapped = 0;
-
-	BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
-
-	rmd.pfn = pfn;
-	rmd.prot = prot;
-	/*
-	 * We use the err_ptr to indicate if there we are doing a contiguous
-	 * mapping or a discontigious mapping.
-	 */
-	rmd.contiguous = !err_ptr;
-	rmd.no_translate = no_translate;
-
-	while (nr) {
-		int index = 0;
-		int done = 0;
-		int batch = min(REMAP_BATCH_SIZE, nr);
-		int batch_left = batch;
-		range = (unsigned long)batch << PAGE_SHIFT;
-
-		rmd.mmu_update = mmu_update;
-		err = apply_to_page_range(vma->vm_mm, addr, range,
-					  remap_area_pfn_pte_fn, &rmd);
-		if (err)
-			goto out;
-
-		/* We record the error for each page that gives an error, but
-		 * continue mapping until the whole set is done */
-		do {
-			int i;
-
-			err = HYPERVISOR_mmu_update(&mmu_update[index],
-						    batch_left, &done, domid);
-
-			/*
-			 * @err_ptr may be the same buffer as @gfn, so
-			 * only clear it after each chunk of @gfn is
-			 * used.
-			 */
-			if (err_ptr) {
-				for (i = index; i < index + done; i++)
-					err_ptr[i] = 0;
-			}
-			if (err < 0) {
-				if (!err_ptr)
-					goto out;
-				err_ptr[i] = err;
-				done++; /* Skip failed frame. */
-			} else
-				mapped += done;
-			batch_left -= done;
-			index += done;
-		} while (batch_left);
-
-		nr -= batch;
-		addr += range;
-		if (err_ptr)
-			err_ptr += batch;
-		cond_resched();
-	}
-out:
-
-	xen_flush_tlb_all();
-
-	return err < 0 ? err : mapped;
-}
-
-int xen_remap_domain_gfn_range(struct vm_area_struct *vma,
-			       unsigned long addr,
-			       xen_pfn_t gfn, int nr,
-			       pgprot_t prot, unsigned domid,
-			       struct page **pages)
-{
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return -EOPNOTSUPP;
-
-	return do_remap_pfn(vma, addr, &gfn, nr, NULL, prot, domid, false,
-			    pages);
-}
-EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_range);
-
-int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
-			       unsigned long addr,
-			       xen_pfn_t *gfn, int nr,
-			       int *err_ptr, pgprot_t prot,
-			       unsigned domid, struct page **pages)
-{
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return xen_xlate_remap_gfn_array(vma, addr, gfn, nr, err_ptr,
-						 prot, domid, pages);
-
-	/* We BUG_ON because it's a programmer error to pass a NULL err_ptr,
-	 * and the consequences later is quite hard to detect what the actual
-	 * cause of "wrong memory was mapped in".
-	 */
-	BUG_ON(err_ptr == NULL);
-	return do_remap_pfn(vma, addr, gfn, nr, err_ptr, prot, domid,
-			    false, pages);
-}
-EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_array);
-
-int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
-			       unsigned long addr,
-			       xen_pfn_t *mfn, int nr,
-			       int *err_ptr, pgprot_t prot,
-			       unsigned int domid, struct page **pages)
-{
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return -EOPNOTSUPP;
-
-	return do_remap_pfn(vma, addr, mfn, nr, err_ptr, prot, domid,
-			    true, pages);
-}
-EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);
-
 /* Returns: 0 success */
 int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
 			       int nr, struct page **pages)
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 45b700ac5fe7..bdfac00b1ec8 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -98,6 +98,12 @@ static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES);
 static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
 #endif /* CONFIG_X86_64 */
 
+/*
+ * Protects atomic reservation decrease/increase against concurrent increases.
+ * Also protects non-atomic updates of current_pages and balloon lists.
+ */
+DEFINE_SPINLOCK(xen_reservation_lock);
+
 /*
  * Note about cr3 (pagetable base) values:
  *
@@ -2665,6 +2671,138 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
 }
 EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
 
+static noinline void xen_flush_tlb_all(void)
+{
+	struct mmuext_op *op;
+	struct multicall_space mcs;
+
+	preempt_disable();
+
+	mcs = xen_mc_entry(sizeof(*op));
+
+	op = mcs.args;
+	op->cmd = MMUEXT_TLB_FLUSH_ALL;
+	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
+
+	xen_mc_issue(PARAVIRT_LAZY_MMU);
+
+	preempt_enable();
+}
+
+#define REMAP_BATCH_SIZE 16
+
+struct remap_data {
+	xen_pfn_t *pfn;
+	bool contiguous;
+	bool no_translate;
+	pgprot_t prot;
+	struct mmu_update *mmu_update;
+};
+
+static int remap_area_pfn_pte_fn(pte_t *ptep, pgtable_t token,
+				 unsigned long addr, void *data)
+{
+	struct remap_data *rmd = data;
+	pte_t pte = pte_mkspecial(mfn_pte(*rmd->pfn, rmd->prot));
+
+	/*
+	 * If we have a contiguous range, just update the pfn itself,
+	 * else update pointer to be "next pfn".
+	 */
+	if (rmd->contiguous)
+		(*rmd->pfn)++;
+	else
+		rmd->pfn++;
+
+	rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
+	rmd->mmu_update->ptr |= rmd->no_translate ?
+		MMU_PT_UPDATE_NO_TRANSLATE :
+		MMU_NORMAL_PT_UPDATE;
+	rmd->mmu_update->val = pte_val_ma(pte);
+	rmd->mmu_update++;
+
+	return 0;
+}
+
+int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr,
+		  xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot,
+		  unsigned int domid, bool no_translate, struct page **pages)
+{
+	int err = 0;
+	struct remap_data rmd;
+	struct mmu_update mmu_update[REMAP_BATCH_SIZE];
+	unsigned long range;
+	int mapped = 0;
+
+	BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
+
+	rmd.pfn = pfn;
+	rmd.prot = prot;
+	/*
+	 * We use the err_ptr to indicate if there we are doing a contiguous
+	 * mapping or a discontigious mapping.
+	 */
+	rmd.contiguous = !err_ptr;
+	rmd.no_translate = no_translate;
+
+	while (nr) {
+		int index = 0;
+		int done = 0;
+		int batch = min(REMAP_BATCH_SIZE, nr);
+		int batch_left = batch;
+
+		range = (unsigned long)batch << PAGE_SHIFT;
+
+		rmd.mmu_update = mmu_update;
+		err = apply_to_page_range(vma->vm_mm, addr, range,
+					  remap_area_pfn_pte_fn, &rmd);
+		if (err)
+			goto out;
+
+		/*
+		 * We record the error for each page that gives an error, but
+		 * continue mapping until the whole set is done
+		 */
+		do {
+			int i;
+
+			err = HYPERVISOR_mmu_update(&mmu_update[index],
+						    batch_left, &done, domid);
+
+			/*
+			 * @err_ptr may be the same buffer as @gfn, so
+			 * only clear it after each chunk of @gfn is
+			 * used.
+			 */
+			if (err_ptr) {
+				for (i = index; i < index + done; i++)
+					err_ptr[i] = 0;
+			}
+			if (err < 0) {
+				if (!err_ptr)
+					goto out;
+				err_ptr[i] = err;
+				done++; /* Skip failed frame. */
+			} else
+				mapped += done;
+			batch_left -= done;
+			index += done;
+		} while (batch_left);
+
+		nr -= batch;
+		addr += range;
+		if (err_ptr)
+			err_ptr += batch;
+		cond_resched();
+	}
+out:
+
+	xen_flush_tlb_all();
+
+	return err < 0 ? err : mapped;
+}
+EXPORT_SYMBOL_GPL(xen_remap_pfn);
+
 #ifdef CONFIG_KEXEC_CORE
 phys_addr_t paddr_vmcoreinfo_note(void)
 {
diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
index 4c5751c26f87..447004861f00 100644
--- a/include/xen/interface/memory.h
+++ b/include/xen/interface/memory.h
@@ -244,12 +244,6 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map);
 #define XENMEM_machine_memory_map   10
 
 
-/*
- * Prevent the balloon driver from changing the memory reservation
- * during a driver critical region.
- */
-extern spinlock_t xen_reservation_lock;
-
 /*
  * Unmaps the page appearing at a particular GPFN from the specified guest's
  * pseudophysical address space.
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index fd18c974a619..18803ff76e27 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -5,6 +5,7 @@
 #include <linux/percpu.h>
 #include <linux/notifier.h>
 #include <linux/efi.h>
+#include <xen/features.h>
 #include <asm/xen/interface.h>
 #include <xen/interface/vcpu.h>
 
@@ -47,6 +48,10 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
 				dma_addr_t *dma_handle);
 
 void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order);
+
+int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr,
+		  xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot,
+		  unsigned int domid, bool no_translate, struct page **pages);
 #else
 static inline int xen_create_contiguous_region(phys_addr_t pstart,
 					       unsigned int order,
@@ -58,10 +63,50 @@ static inline int xen_create_contiguous_region(phys_addr_t pstart,
 
 static inline void xen_destroy_contiguous_region(phys_addr_t pstart,
 						 unsigned int order) { }
+
+static inline int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr,
+				xen_pfn_t *pfn, int nr, int *err_ptr,
+				pgprot_t prot,  unsigned int domid,
+				bool no_translate, struct page **pages)
+{
+	BUG();
+	return 0;
+}
 #endif
 
 struct vm_area_struct;
 
+#ifdef CONFIG_XEN_AUTO_XLATE
+int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
+			      unsigned long addr,
+			      xen_pfn_t *gfn, int nr,
+			      int *err_ptr, pgprot_t prot,
+			      unsigned int domid,
+			      struct page **pages);
+int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
+			      int nr, struct page **pages);
+#else
+/*
+ * These two functions are called from arch/x86/xen/mmu.c and so stubs
+ * are needed for a configuration not specifying CONFIG_XEN_AUTO_XLATE.
+ */
+static inline int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
+					    unsigned long addr,
+					    xen_pfn_t *gfn, int nr,
+					    int *err_ptr, pgprot_t prot,
+					    unsigned int domid,
+					    struct page **pages)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
+					    int nr, struct page **pages)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
 /*
  * xen_remap_domain_gfn_array() - map an array of foreign frames by gfn
  * @vma:     VMA to map the pages into
@@ -79,12 +124,25 @@ struct vm_area_struct;
  * Returns the number of successfully mapped frames, or a -ve error
  * code.
  */
-int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
-			       unsigned long addr,
-			       xen_pfn_t *gfn, int nr,
-			       int *err_ptr, pgprot_t prot,
-			       unsigned domid,
-			       struct page **pages);
+static inline int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
+					     unsigned long addr,
+					     xen_pfn_t *gfn, int nr,
+					     int *err_ptr, pgprot_t prot,
+					     unsigned int domid,
+					     struct page **pages)
+{
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return xen_xlate_remap_gfn_array(vma, addr, gfn, nr, err_ptr,
+						 prot, domid, pages);
+
+	/* We BUG_ON because it's a programmer error to pass a NULL err_ptr,
+	 * and the consequences later is quite hard to detect what the actual
+	 * cause of "wrong memory was mapped in".
+	 */
+	BUG_ON(err_ptr == NULL);
+	return xen_remap_pfn(vma, addr, gfn, nr, err_ptr, prot, domid,
+			     false, pages);
+}
 
 /*
  * xen_remap_domain_mfn_array() - map an array of foreign frames by mfn
@@ -103,10 +161,18 @@ int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
  * Returns the number of successfully mapped frames, or a -ve error
  * code.
  */
-int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
-			       unsigned long addr, xen_pfn_t *mfn, int nr,
-			       int *err_ptr, pgprot_t prot,
-			       unsigned int domid, struct page **pages);
+static inline int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
+					     unsigned long addr, xen_pfn_t *mfn,
+					     int nr, int *err_ptr,
+					     pgprot_t prot, unsigned int domid,
+					     struct page **pages)
+{
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return -EOPNOTSUPP;
+
+	return xen_remap_pfn(vma, addr, mfn, nr, err_ptr, prot, domid,
+			     true, pages);
+}
 
 /* xen_remap_domain_gfn_range() - map a range of foreign frames
  * @vma:     VMA to map the pages into
@@ -120,44 +186,21 @@ int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
  * Returns the number of successfully mapped frames, or a -ve error
  * code.
  */
-int xen_remap_domain_gfn_range(struct vm_area_struct *vma,
-			       unsigned long addr,
-			       xen_pfn_t gfn, int nr,
-			       pgprot_t prot, unsigned domid,
-			       struct page **pages);
-int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
-			       int numpgs, struct page **pages);
-
-#ifdef CONFIG_XEN_AUTO_XLATE
-int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
-			      unsigned long addr,
-			      xen_pfn_t *gfn, int nr,
-			      int *err_ptr, pgprot_t prot,
-			      unsigned domid,
-			      struct page **pages);
-int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
-			      int nr, struct page **pages);
-#else
-/*
- * These two functions are called from arch/x86/xen/mmu.c and so stubs
- * are needed for a configuration not specifying CONFIG_XEN_AUTO_XLATE.
- */
-static inline int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
-					    unsigned long addr,
-					    xen_pfn_t *gfn, int nr,
-					    int *err_ptr, pgprot_t prot,
-					    unsigned int domid,
-					    struct page **pages)
+static inline int xen_remap_domain_gfn_range(struct vm_area_struct *vma,
+					     unsigned long addr,
+					     xen_pfn_t gfn, int nr,
+					     pgprot_t prot, unsigned int domid,
+					     struct page **pages)
 {
-	return -EOPNOTSUPP;
-}
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return -EOPNOTSUPP;
 
-static inline int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
-					    int nr, struct page **pages)
-{
-	return -EOPNOTSUPP;
+	return xen_remap_pfn(vma, addr, &gfn, nr, NULL, prot, domid, false,
+			     pages);
 }
-#endif
+
+int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
+			       int numpgs, struct page **pages);
 
 int xen_xlate_map_ballooned_pages(xen_pfn_t **pfns, void **vaddr,
 				  unsigned long nr_grant_frames);
-- 
2.16.4

^ permalink raw reply related

* [PATCH v3 01/15] xen: move pv irq related functions under CONFIG_XEN_PV umbrella
From: Juergen Gross @ 2018-08-28  7:40 UTC (permalink / raw)
  To: linux-kernel, xen-devel, x86, virtualization
  Cc: Juergen Gross, boris.ostrovsky, rusty, mingo, hpa, akataria, tglx
In-Reply-To: <20180828074026.820-1-jgross@suse.com>

All functions in arch/x86/xen/irq.c and arch/x86/xen/xen-asm*.S are
specific to PV guests. Include them in the kernel with
CONFIG_XEN_PV only.

Make the PV specific code in arch/x86/entry/entry_*.S dependent on
CONFIG_XEN_PV instead of CONFIG_XEN.

The HVM specific code should depend on CONFIG_XEN_PVHVM.

While at it reformat the Makefile to make it more readable.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/entry/entry_32.S |  8 +++++---
 arch/x86/entry/entry_64.S |  8 +++++---
 arch/x86/xen/Makefile     | 41 +++++++++++++++++++++++++++++++----------
 include/xen/events.h      |  2 ++
 4 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 2767c625a52c..9cc4c3064ce0 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -782,7 +782,7 @@ GLOBAL(__begin_SYSENTER_singlestep_region)
  * will ignore all of the single-step traps generated in this range.
  */
 
-#ifdef CONFIG_XEN
+#ifdef CONFIG_XEN_PV
 /*
  * Xen doesn't set %esp to be precisely what the normal SYSENTER
  * entry point expects, so fix it up before using the normal path.
@@ -1240,7 +1240,7 @@ ENTRY(spurious_interrupt_bug)
 	jmp	common_exception
 END(spurious_interrupt_bug)
 
-#ifdef CONFIG_XEN
+#ifdef CONFIG_XEN_PV
 ENTRY(xen_hypervisor_callback)
 	pushl	$-1				/* orig_ax = -1 => not a system call */
 	SAVE_ALL
@@ -1321,11 +1321,13 @@ ENTRY(xen_failsafe_callback)
 	_ASM_EXTABLE(3b, 8b)
 	_ASM_EXTABLE(4b, 9b)
 ENDPROC(xen_failsafe_callback)
+#endif /* CONFIG_XEN_PV */
 
+#ifdef CONFIG_XEN_PVHVM
 BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
 		 xen_evtchn_do_upcall)
+#endif
 
-#endif /* CONFIG_XEN */
 
 #if IS_ENABLED(CONFIG_HYPERV)
 
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 957dfb693ecc..a9ec5d3c6e67 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1050,7 +1050,7 @@ ENTRY(do_softirq_own_stack)
 	ret
 ENDPROC(do_softirq_own_stack)
 
-#ifdef CONFIG_XEN
+#ifdef CONFIG_XEN_PV
 idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0
 
 /*
@@ -1130,11 +1130,13 @@ ENTRY(xen_failsafe_callback)
 	ENCODE_FRAME_POINTER
 	jmp	error_exit
 END(xen_failsafe_callback)
+#endif /* CONFIG_XEN_PV */
 
+#ifdef CONFIG_XEN_PVHVM
 apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
 	xen_hvm_callback_vector xen_evtchn_do_upcall
+#endif
 
-#endif /* CONFIG_XEN */
 
 #if IS_ENABLED(CONFIG_HYPERV)
 apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
@@ -1151,7 +1153,7 @@ idtentry debug			do_debug		has_error_code=0	paranoid=1 shift_ist=DEBUG_STACK
 idtentry int3			do_int3			has_error_code=0
 idtentry stack_segment		do_stack_segment	has_error_code=1
 
-#ifdef CONFIG_XEN
+#ifdef CONFIG_XEN_PV
 idtentry xennmi			do_nmi			has_error_code=0
 idtentry xendebug		do_debug		has_error_code=0
 idtentry xenint3		do_int3			has_error_code=0
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index d83cb5478f54..f723b5aa8f74 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -12,25 +12,46 @@ endif
 # Make sure early boot has no stackprotector
 nostackp := $(call cc-option, -fno-stack-protector)
 CFLAGS_enlighten_pv.o		:= $(nostackp)
-CFLAGS_mmu_pv.o		:= $(nostackp)
+CFLAGS_mmu_pv.o			:= $(nostackp)
 
-obj-y		:= enlighten.o multicalls.o mmu.o irq.o \
-			time.o xen-asm.o xen-asm_$(BITS).o \
-			grant-table.o suspend.o platform-pci-unplug.o
+obj-y				+= enlighten.o
+obj-y				+= multicalls.o
+obj-y				+= mmu.o
+obj-y				+= time.o
+obj-y				+= grant-table.o
+obj-y				+= suspend.o
+obj-y				+= platform-pci-unplug.o
 
-obj-$(CONFIG_XEN_PVHVM)		+= enlighten_hvm.o mmu_hvm.o suspend_hvm.o
-obj-$(CONFIG_XEN_PV)			+= setup.o apic.o pmu.o suspend_pv.o \
-						p2m.o enlighten_pv.o mmu_pv.o
-obj-$(CONFIG_XEN_PVH)			+= enlighten_pvh.o
+obj-$(CONFIG_XEN_PVHVM)		+= enlighten_hvm.o
+obj-$(CONFIG_XEN_PVHVM)		+= mmu_hvm.o
+obj-$(CONFIG_XEN_PVHVM)		+= suspend_hvm.o
 
-obj-$(CONFIG_EVENT_TRACING) += trace.o
+obj-$(CONFIG_XEN_PV)		+= setup.o
+obj-$(CONFIG_XEN_PV)		+= apic.o
+obj-$(CONFIG_XEN_PV)		+= pmu.o
+obj-$(CONFIG_XEN_PV)		+= suspend_pv.o
+obj-$(CONFIG_XEN_PV)		+= p2m.o
+obj-$(CONFIG_XEN_PV)		+= enlighten_pv.o
+obj-$(CONFIG_XEN_PV)		+= mmu_pv.o
+obj-$(CONFIG_XEN_PV)		+= irq.o
+obj-$(CONFIG_XEN_PV)		+= xen-asm.o
+obj-$(CONFIG_XEN_PV)		+= xen-asm_$(BITS).o
+
+obj-$(CONFIG_XEN_PVH)		+= enlighten_pvh.o
+obj-$(CONFIG_XEN_PVH)	 	+= xen-pvh.o
+
+obj-$(CONFIG_EVENT_TRACING)	+= trace.o
 
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_XEN_PV_SMP)  	+= smp_pv.o
 obj-$(CONFIG_XEN_PVHVM_SMP)  	+= smp_hvm.o
+
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
+
 obj-$(CONFIG_XEN_DEBUG_FS)	+= debugfs.o
+
 obj-$(CONFIG_XEN_DOM0)		+= vga.o
+
 obj-$(CONFIG_SWIOTLB_XEN)	+= pci-swiotlb-xen.o
+
 obj-$(CONFIG_XEN_EFI)		+= efi.o
-obj-$(CONFIG_XEN_PVH)	 	+= xen-pvh.o
diff --git a/include/xen/events.h b/include/xen/events.h
index c3e6bc643a7b..a48897199975 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -89,11 +89,13 @@ unsigned irq_from_evtchn(unsigned int evtchn);
 int irq_from_virq(unsigned int cpu, unsigned int virq);
 unsigned int evtchn_from_irq(unsigned irq);
 
+#ifdef CONFIG_XEN_PVHVM
 /* Xen HVM evtchn vector callback */
 void xen_hvm_callback_vector(void);
 #ifdef CONFIG_TRACING
 #define trace_xen_hvm_callback_vector xen_hvm_callback_vector
 #endif
+#endif
 int xen_set_callback_via(uint64_t via);
 void xen_evtchn_do_upcall(struct pt_regs *regs);
 void xen_hvm_evtchn_do_upcall(void);
-- 
2.16.4

^ permalink raw reply related

* [PATCH v3 00/15] x86/paravirt, xen: several cleanups
From: Juergen Gross @ 2018-08-28  7:40 UTC (permalink / raw)
  To: linux-kernel, xen-devel, x86, virtualization
  Cc: Juergen Gross, boris.ostrovsky, rusty, mingo, hpa, akataria, tglx

This series removes some no longer needed stuff from paravirt
infrastructure and puts large quantities of paravirt ops under a new
config option PARAVIRT_XXL which is selected by XEN_PV only.

Some Xen related cleanups:
- move some pv-only code from CONFIG_XEN to CONFIG_XEN_PV
- use CONFIG_XEN_PVHVM in Makefile instead of #ifdef around a complete source
- add SPDX identifier where missing

A pvops kernel without XEN_PV being configured is about 2.5% smaller
with this series applied.

Changes in V3:
- merged with the Xen cleanup series which is a prerequisite for the
  pv cleanup
- fix build errors on ARM
- let VSMP select PARAVIRT_XXL, put all irq_ops under PARAVIRT_XXL

Changes in V2:
- patch 4: shorten pv_ops sub-structure names (Jan Beulich)
- patch 11: new patch

Juergen Gross (15):
  xen: move pv irq related functions under CONFIG_XEN_PV umbrella
  xen: move pv specific parts of arch/x86/xen/mmu.c to mmu_pv.c
  xen: link platform-pci-unplug.o only if CONFIG_XEN_PVHVM
  xen: add SPDX identifier in arch/x86/xen files
  x86/paravirt: make paravirt_patch_call() and paravirt_patch_jmp()
    static
  x86/paravirt: remove clobbers parameter from paravirt patch functions
  x86/paravirt: remove clobbers from struct paravirt_patch_site
  x86/paravirt: use a single ops structure
  x86/paravirt: remove unused paravirt bits
  x86/paravirt: introduce new config option PARAVIRT_XXL
  x86/paravirt: move items in pv_info under PARAVIRT_XXL umbrella
  x86/paravirt: move the Xen-only pv_cpu_ops under the PARAVIRT_XXL
    umbrella
  x86/paravirt: move the pv_irq_ops under the PARAVIRT_XXL umbrella
  x86/paravirt: move the Xen-only pv_mmu_ops under the PARAVIRT_XXL
    umbrella
  x86/paravirt: remove unneeded mmu related paravirt ops bits

 arch/arm/include/asm/paravirt.h             |   9 +-
 arch/arm/kernel/paravirt.c                  |   4 +-
 arch/arm/xen/enlighten.c                    |  34 ---
 arch/arm64/include/asm/paravirt.h           |   9 +-
 arch/arm64/kernel/paravirt.c                |   4 +-
 arch/x86/Kconfig                            |   4 +
 arch/x86/boot/compressed/misc.h             |   1 +
 arch/x86/entry/entry_32.S                   |   8 +-
 arch/x86/entry/entry_64.S                   |   8 +-
 arch/x86/hyperv/mmu.c                       |   4 +-
 arch/x86/include/asm/debugreg.h             |   2 +-
 arch/x86/include/asm/desc.h                 |   4 +-
 arch/x86/include/asm/fixmap.h               |   2 +-
 arch/x86/include/asm/irqflags.h             |  16 +-
 arch/x86/include/asm/mmu_context.h          |   4 +-
 arch/x86/include/asm/msr.h                  |   4 +-
 arch/x86/include/asm/paravirt.h             | 415 +++++++++++++---------------
 arch/x86/include/asm/paravirt_types.h       |  82 +++---
 arch/x86/include/asm/pgalloc.h              |   2 +-
 arch/x86/include/asm/pgtable-3level_types.h |   2 +-
 arch/x86/include/asm/pgtable.h              |   7 +-
 arch/x86/include/asm/processor.h            |   4 +-
 arch/x86/include/asm/ptrace.h               |   2 +-
 arch/x86/include/asm/segment.h              |   2 +-
 arch/x86/include/asm/special_insns.h        |   4 +-
 arch/x86/kernel/alternative.c               |   2 +-
 arch/x86/kernel/asm-offsets.c               |  13 +-
 arch/x86/kernel/asm-offsets_64.c            |   9 +-
 arch/x86/kernel/cpu/common.c                |   4 +-
 arch/x86/kernel/cpu/vmware.c                |   4 +-
 arch/x86/kernel/head_64.S                   |   2 +-
 arch/x86/kernel/kvm.c                       |  19 +-
 arch/x86/kernel/kvmclock.c                  |   4 +-
 arch/x86/kernel/paravirt-spinlocks.c        |  15 +-
 arch/x86/kernel/paravirt.c                  | 305 ++++++++++----------
 arch/x86/kernel/paravirt_patch_32.c         |  57 ++--
 arch/x86/kernel/paravirt_patch_64.c         |  65 ++---
 arch/x86/kernel/tsc.c                       |   2 +-
 arch/x86/kernel/vsmp_64.c                   |  26 +-
 arch/x86/mm/mem_encrypt_identity.c          |   1 +
 arch/x86/xen/Kconfig                        |   1 +
 arch/x86/xen/Makefile                       |  41 ++-
 arch/x86/xen/efi.c                          |  14 +-
 arch/x86/xen/enlighten.c                    |   2 +
 arch/x86/xen/enlighten_hvm.c                |   2 +
 arch/x86/xen/enlighten_pv.c                 |  31 ++-
 arch/x86/xen/grant-table.c                  |  25 +-
 arch/x86/xen/irq.c                          |   2 +-
 arch/x86/xen/mmu.c                          | 188 +------------
 arch/x86/xen/mmu_hvm.c                      |   2 +-
 arch/x86/xen/mmu_pv.c                       | 168 ++++++++++-
 arch/x86/xen/p2m.c                          |   2 +
 arch/x86/xen/pci-swiotlb-xen.c              |   2 +
 arch/x86/xen/platform-pci-unplug.c          |  18 +-
 arch/x86/xen/spinlock.c                     |  11 +-
 arch/x86/xen/time.c                         |   4 +-
 arch/x86/xen/vdso.h                         |   2 +
 arch/x86/xen/xen-pvh.S                      |  15 +-
 drivers/xen/time.c                          |   2 +-
 include/xen/events.h                        |   2 +
 include/xen/interface/memory.h              |   6 -
 include/xen/xen-ops.h                       | 133 ++++++---
 62 files changed, 880 insertions(+), 958 deletions(-)

-- 
2.16.4

^ permalink raw reply

* Re: [PATCH net-next v2 0/5] virtio: support packed ring
From: Michael S. Tsirkin @ 2018-08-27 14:00 UTC (permalink / raw)
  To: Tiwei Bie; +Cc: virtio-dev, netdev, linux-kernel, virtualization, wexu
In-Reply-To: <20180711022711.7090-1-tiwei.bie@intel.com>

Are there still plans to test the performance with vost pmd?
vhost doesn't seem to show a performance gain ...


On Wed, Jul 11, 2018 at 10:27:06AM +0800, Tiwei Bie wrote:
> Hello everyone,
> 
> This patch set implements packed ring support in virtio driver.
> 
> Some functional tests have been done with Jason's
> packed ring implementation in vhost:
> 
> https://lkml.org/lkml/2018/7/3/33
> 
> Both of ping and netperf worked as expected.
> 
> v1 -> v2:
> - Use READ_ONCE() to read event off_wrap and flags together (Jason);
> - Add comments related to ccw (Jason);
> 
> RFC (v6) -> v1:
> - Avoid extra virtio_wmb() in virtqueue_enable_cb_delayed_packed()
>   when event idx is off (Jason);
> - Fix bufs calculation in virtqueue_enable_cb_delayed_packed() (Jason);
> - Test the state of the desc at used_idx instead of last_used_idx
>   in virtqueue_enable_cb_delayed_packed() (Jason);
> - Save wrap counter (as part of queue state) in the return value
>   of virtqueue_enable_cb_prepare_packed();
> - Refine the packed ring definitions in uapi;
> - Rebase on the net-next tree;
> 
> RFC v5 -> RFC v6:
> - Avoid tracking addr/len/flags when DMA API isn't used (MST/Jason);
> - Define wrap counter as bool (Jason);
> - Use ALIGN() in vring_init_packed() (Jason);
> - Avoid using pointer to track `next` in detach_buf_packed() (Jason);
> - Add comments for barriers (Jason);
> - Don't enable RING_PACKED on ccw for now (noticed by Jason);
> - Refine the memory barrier in virtqueue_poll();
> - Add a missing memory barrier in virtqueue_enable_cb_delayed_packed();
> - Remove the hacks in virtqueue_enable_cb_prepare_packed();
> 
> RFC v4 -> RFC v5:
> - Save DMA addr, etc in desc state (Jason);
> - Track used wrap counter;
> 
> RFC v3 -> RFC v4:
> - Make ID allocation support out-of-order (Jason);
> - Various fixes for EVENT_IDX support;
> 
> RFC v2 -> RFC v3:
> - Split into small patches (Jason);
> - Add helper virtqueue_use_indirect() (Jason);
> - Just set id for the last descriptor of a list (Jason);
> - Calculate the prev in virtqueue_add_packed() (Jason);
> - Fix/improve desc suppression code (Jason/MST);
> - Refine the code layout for XXX_split/packed and wrappers (MST);
> - Fix the comments and API in uapi (MST);
> - Remove the BUG_ON() for indirect (Jason);
> - Some other refinements and bug fixes;
> 
> RFC v1 -> RFC v2:
> - Add indirect descriptor support - compile test only;
> - Add event suppression supprt - compile test only;
> - Move vring_packed_init() out of uapi (Jason, MST);
> - Merge two loops into one in virtqueue_add_packed() (Jason);
> - Split vring_unmap_one() for packed ring and split ring (Jason);
> - Avoid using '%' operator (Jason);
> - Rename free_head -> next_avail_idx (Jason);
> - Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
> - Some other refinements and bug fixes;
> 
> Thanks!
> 
> Tiwei Bie (5):
>   virtio: add packed ring definitions
>   virtio_ring: support creating packed ring
>   virtio_ring: add packed ring support
>   virtio_ring: add event idx support in packed ring
>   virtio_ring: enable packed ring
> 
>  drivers/s390/virtio/virtio_ccw.c   |   14 +
>  drivers/virtio/virtio_ring.c       | 1365 ++++++++++++++++++++++------
>  include/linux/virtio_ring.h        |    8 +-
>  include/uapi/linux/virtio_config.h |    3 +
>  include/uapi/linux/virtio_ring.h   |   43 +
>  5 files changed, 1157 insertions(+), 276 deletions(-)
> 
> -- 
> 2.18.0

^ permalink raw reply

* [PATCH v37 3/3] virtio-balloon: VIRTIO_BALLOON_F_PAGE_POISON
From: Wei Wang @ 2018-08-27  1:32 UTC (permalink / raw)
  To: virtio-dev, linux-kernel, virtualization, kvm, linux-mm, mst,
	mhocko, akpm, dgilbert
  Cc: yang.zhang.wz, riel, quan.xu0, liliang.opensource, pbonzini,
	nilal, torvalds
In-Reply-To: <1535333539-32420-1-git-send-email-wei.w.wang@intel.com>

The VIRTIO_BALLOON_F_PAGE_POISON feature bit is used to indicate if the
guest is using page poisoning. Guest writes to the poison_val config
field to tell host about the page poisoning value that is in use.

Suggested-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Wei Wang <wei.w.wang@intel.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/virtio/virtio_balloon.c     | 10 ++++++++++
 include/uapi/linux/virtio_balloon.h |  3 +++
 2 files changed, 13 insertions(+)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index a185678..728ecd1 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -825,6 +825,7 @@ static int virtio_balloon_register_shrinker(struct virtio_balloon *vb)
 static int virtballoon_probe(struct virtio_device *vdev)
 {
 	struct virtio_balloon *vb;
+	__u32 poison_val;
 	int err;
 
 	if (!vdev->config->get) {
@@ -892,6 +893,11 @@ static int virtballoon_probe(struct virtio_device *vdev)
 		vb->num_free_page_blocks = 0;
 		spin_lock_init(&vb->free_page_list_lock);
 		INIT_LIST_HEAD(&vb->free_page_list);
+		if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) {
+			memset(&poison_val, PAGE_POISON, sizeof(poison_val));
+			virtio_cwrite(vb->vdev, struct virtio_balloon_config,
+				      poison_val, &poison_val);
+		}
 	}
 	/*
 	 * We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a
@@ -992,6 +998,9 @@ static int virtballoon_restore(struct virtio_device *vdev)
 
 static int virtballoon_validate(struct virtio_device *vdev)
 {
+	if (!page_poisoning_enabled())
+		__virtio_clear_bit(vdev, VIRTIO_BALLOON_F_PAGE_POISON);
+
 	__virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM);
 	return 0;
 }
@@ -1001,6 +1010,7 @@ static unsigned int features[] = {
 	VIRTIO_BALLOON_F_STATS_VQ,
 	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
 	VIRTIO_BALLOON_F_FREE_PAGE_HINT,
+	VIRTIO_BALLOON_F_PAGE_POISON,
 };
 
 static struct virtio_driver virtio_balloon_driver = {
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index 47c9eb4..a1966cd7 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -35,6 +35,7 @@
 #define VIRTIO_BALLOON_F_STATS_VQ	1 /* Memory Stats virtqueue */
 #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
 #define VIRTIO_BALLOON_F_FREE_PAGE_HINT	3 /* VQ to report free pages */
+#define VIRTIO_BALLOON_F_PAGE_POISON	4 /* Guest is using page poisoning */
 
 /* Size of a PFN in the balloon interface. */
 #define VIRTIO_BALLOON_PFN_SHIFT 12
@@ -48,6 +49,8 @@ struct virtio_balloon_config {
 	__u32 actual;
 	/* Free page report command id, readonly by guest */
 	__u32 free_page_report_cmd_id;
+	/* Stores PAGE_POISON if page poisoning is in use */
+	__u32 poison_val;
 };
 
 #define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */
-- 
2.7.4

^ permalink raw reply related

* [PATCH v37 2/3] mm/page_poison: expose page_poisoning_enabled to kernel modules
From: Wei Wang @ 2018-08-27  1:32 UTC (permalink / raw)
  To: virtio-dev, linux-kernel, virtualization, kvm, linux-mm, mst,
	mhocko, akpm, dgilbert
  Cc: yang.zhang.wz, riel, quan.xu0, liliang.opensource, pbonzini,
	nilal, torvalds
In-Reply-To: <1535333539-32420-1-git-send-email-wei.w.wang@intel.com>

In some usages, e.g. virtio-balloon, a kernel module needs to know if
page poisoning is in use. This patch exposes the page_poisoning_enabled
function to kernel modules.

Signed-off-by: Wei Wang <wei.w.wang@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/page_poison.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/mm/page_poison.c b/mm/page_poison.c
index aa2b3d3..830f604 100644
--- a/mm/page_poison.c
+++ b/mm/page_poison.c
@@ -17,6 +17,11 @@ static int __init early_page_poison_param(char *buf)
 }
 early_param("page_poison", early_page_poison_param);
 
+/**
+ * page_poisoning_enabled - check if page poisoning is enabled
+ *
+ * Return true if page poisoning is enabled, or false if not.
+ */
 bool page_poisoning_enabled(void)
 {
 	/*
@@ -29,6 +34,7 @@ bool page_poisoning_enabled(void)
 		(!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) &&
 		debug_pagealloc_enabled()));
 }
+EXPORT_SYMBOL_GPL(page_poisoning_enabled);
 
 static void poison_page(struct page *page)
 {
-- 
2.7.4

^ permalink raw reply related

* [PATCH v37 1/3] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT
From: Wei Wang @ 2018-08-27  1:32 UTC (permalink / raw)
  To: virtio-dev, linux-kernel, virtualization, kvm, linux-mm, mst,
	mhocko, akpm, dgilbert
  Cc: yang.zhang.wz, riel, quan.xu0, liliang.opensource, pbonzini,
	nilal, torvalds
In-Reply-To: <1535333539-32420-1-git-send-email-wei.w.wang@intel.com>

Negotiation of the VIRTIO_BALLOON_F_FREE_PAGE_HINT feature indicates the
support of reporting hints of guest free pages to host via virtio-balloon.
Currenlty, only free page blocks of MAX_ORDER - 1 are reported. They are
obtained one by one from the mm free list via the regular allocation
function.

Host requests the guest to report free page hints by sending a new cmd id
to the guest via the free_page_report_cmd_id configuration register. When
the guest starts to report, it first sends a start cmd to host via the
free page vq, which acks to host the cmd id received. When the guest
finishes reporting free pages, a stop cmd is sent to host via the vq.
Host may also send a stop cmd id to the guest to stop the reporting.

VIRTIO_BALLOON_CMD_ID_STOP: Host sends this cmd to stop the guest
reporting.
VIRTIO_BALLOON_CMD_ID_DONE: Host sends this cmd to tell the guest that
the reported pages are ready to be freed.

Why does the guest free the reported pages when host tells it is ready to
free?
This is because freeing pages appears to be expensive for live migration.
free_pages() dirties memory very quickly and makes the live migraion not
converge in some cases. So it is good to delay the free_page operation
when the migration is done, and host sends a command to guest about that.

Why do we need the new VIRTIO_BALLOON_CMD_ID_DONE, instead of reusing
VIRTIO_BALLOON_CMD_ID_STOP?
This is because live migration is usually done in several rounds. At the
end of each round, host needs to send a VIRTIO_BALLOON_CMD_ID_STOP cmd to
the guest to stop (or say pause) the reporting. The guest resumes the
reporting when it receives a new command id at the beginning of the next
round. So we need a new cmd id to distinguish between "stop reporting" and
"ready to free the reported pages".

TODO:
- Add a batch page allocation API to amortize the allocation overhead.

Signed-off-by: Wei Wang <wei.w.wang@intel.com>
Signed-off-by: Liang Li <liang.z.li@intel.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/virtio/virtio_balloon.c     | 364 ++++++++++++++++++++++++++++++++----
 include/uapi/linux/virtio_balloon.h |   5 +
 2 files changed, 336 insertions(+), 33 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index d1c1f62..a185678 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -41,13 +41,34 @@
 #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
 #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
 
+#define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \
+					     __GFP_NOMEMALLOC)
+/* The order of free page blocks to report to host */
+#define VIRTIO_BALLOON_FREE_PAGE_ORDER (MAX_ORDER - 1)
+/* The size of a free page block in bytes */
+#define VIRTIO_BALLOON_FREE_PAGE_SIZE \
+	(1 << (VIRTIO_BALLOON_FREE_PAGE_ORDER + PAGE_SHIFT))
+
 #ifdef CONFIG_BALLOON_COMPACTION
 static struct vfsmount *balloon_mnt;
 #endif
 
+enum virtio_balloon_vq {
+	VIRTIO_BALLOON_VQ_INFLATE,
+	VIRTIO_BALLOON_VQ_DEFLATE,
+	VIRTIO_BALLOON_VQ_STATS,
+	VIRTIO_BALLOON_VQ_FREE_PAGE,
+	VIRTIO_BALLOON_VQ_MAX
+};
+
 struct virtio_balloon {
 	struct virtio_device *vdev;
-	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
+	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq;
+
+	/* Balloon's own wq for cpu-intensive work items */
+	struct workqueue_struct *balloon_wq;
+	/* The free page reporting work item submitted to the balloon wq */
+	struct work_struct report_free_page_work;
 
 	/* The balloon servicing is delegated to a freezable workqueue. */
 	struct work_struct update_balloon_stats_work;
@@ -57,6 +78,18 @@ struct virtio_balloon {
 	spinlock_t stop_update_lock;
 	bool stop_update;
 
+	/* The list of allocated free pages, waiting to be given back to mm */
+	struct list_head free_page_list;
+	spinlock_t free_page_list_lock;
+	/* The number of free page blocks on the above list */
+	unsigned long num_free_page_blocks;
+	/* The cmd id received from host */
+	u32 cmd_id_received;
+	/* The cmd id that is actively in use */
+	__virtio32 cmd_id_active;
+	/* Buffer to store the stop sign */
+	__virtio32 cmd_id_stop;
+
 	/* Waiting for host to ack the pages we released. */
 	wait_queue_head_t acked;
 
@@ -320,17 +353,6 @@ static void stats_handle_request(struct virtio_balloon *vb)
 	virtqueue_kick(vq);
 }
 
-static void virtballoon_changed(struct virtio_device *vdev)
-{
-	struct virtio_balloon *vb = vdev->priv;
-	unsigned long flags;
-
-	spin_lock_irqsave(&vb->stop_update_lock, flags);
-	if (!vb->stop_update)
-		queue_work(system_freezable_wq, &vb->update_balloon_size_work);
-	spin_unlock_irqrestore(&vb->stop_update_lock, flags);
-}
-
 static inline s64 towards_target(struct virtio_balloon *vb)
 {
 	s64 target;
@@ -347,6 +369,60 @@ static inline s64 towards_target(struct virtio_balloon *vb)
 	return target - vb->num_pages;
 }
 
+/* Gives back @num_to_return blocks of free pages to mm. */
+static unsigned long return_free_pages_to_mm(struct virtio_balloon *vb,
+					     unsigned long num_to_return)
+{
+	struct page *page;
+	unsigned long num_returned;
+
+	spin_lock_irq(&vb->free_page_list_lock);
+	for (num_returned = 0; num_returned < num_to_return; num_returned++) {
+		page = balloon_page_pop(&vb->free_page_list);
+		if (!page)
+			break;
+		free_pages((unsigned long)page_address(page),
+			   VIRTIO_BALLOON_FREE_PAGE_ORDER);
+	}
+	vb->num_free_page_blocks -= num_returned;
+	spin_unlock_irq(&vb->free_page_list_lock);
+
+	return num_returned;
+}
+
+static void virtballoon_changed(struct virtio_device *vdev)
+{
+	struct virtio_balloon *vb = vdev->priv;
+	unsigned long flags;
+	s64 diff = towards_target(vb);
+
+	if (diff) {
+		spin_lock_irqsave(&vb->stop_update_lock, flags);
+		if (!vb->stop_update)
+			queue_work(system_freezable_wq,
+				   &vb->update_balloon_size_work);
+		spin_unlock_irqrestore(&vb->stop_update_lock, flags);
+	}
+
+	if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+		virtio_cread(vdev, struct virtio_balloon_config,
+			     free_page_report_cmd_id, &vb->cmd_id_received);
+		if (vb->cmd_id_received == VIRTIO_BALLOON_CMD_ID_DONE) {
+			/* Pass ULONG_MAX to give back all the free pages */
+			return_free_pages_to_mm(vb, ULONG_MAX);
+		} else if (vb->cmd_id_received != VIRTIO_BALLOON_CMD_ID_STOP &&
+			   vb->cmd_id_received !=
+			   virtio32_to_cpu(vdev, vb->cmd_id_active)) {
+			spin_lock_irqsave(&vb->stop_update_lock, flags);
+			if (!vb->stop_update) {
+				queue_work(vb->balloon_wq,
+					   &vb->report_free_page_work);
+			}
+			spin_unlock_irqrestore(&vb->stop_update_lock, flags);
+		}
+	}
+}
+
 static void update_balloon_size(struct virtio_balloon *vb)
 {
 	u32 actual = vb->num_pages;
@@ -389,26 +465,44 @@ static void update_balloon_size_func(struct work_struct *work)
 
 static int init_vqs(struct virtio_balloon *vb)
 {
-	struct virtqueue *vqs[3];
-	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request };
-	static const char * const names[] = { "inflate", "deflate", "stats" };
-	int err, nvqs;
+	struct virtqueue *vqs[VIRTIO_BALLOON_VQ_MAX];
+	vq_callback_t *callbacks[VIRTIO_BALLOON_VQ_MAX];
+	const char *names[VIRTIO_BALLOON_VQ_MAX];
+	int err;
 
 	/*
-	 * We expect two virtqueues: inflate and deflate, and
-	 * optionally stat.
+	 * Inflateq and deflateq are used unconditionally. The names[]
+	 * will be NULL if the related feature is not enabled, which will
+	 * cause no allocation for the corresponding virtqueue in find_vqs.
 	 */
-	nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
-	err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL);
+	callbacks[VIRTIO_BALLOON_VQ_INFLATE] = balloon_ack;
+	names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate";
+	callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack;
+	names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate";
+	names[VIRTIO_BALLOON_VQ_STATS] = NULL;
+	names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
+
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
+		names[VIRTIO_BALLOON_VQ_STATS] = "stats";
+		callbacks[VIRTIO_BALLOON_VQ_STATS] = stats_request;
+	}
+
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+		names[VIRTIO_BALLOON_VQ_FREE_PAGE] = "free_page_vq";
+		callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
+	}
+
+	err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX,
+					 vqs, callbacks, names, NULL, NULL);
 	if (err)
 		return err;
 
-	vb->inflate_vq = vqs[0];
-	vb->deflate_vq = vqs[1];
+	vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE];
+	vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE];
 	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
 		struct scatterlist sg;
 		unsigned int num_stats;
-		vb->stats_vq = vqs[2];
+		vb->stats_vq = vqs[VIRTIO_BALLOON_VQ_STATS];
 
 		/*
 		 * Prime this virtqueue with one buffer so the hypervisor can
@@ -426,9 +520,145 @@ static int init_vqs(struct virtio_balloon *vb)
 		}
 		virtqueue_kick(vb->stats_vq);
 	}
+
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
+		vb->free_page_vq = vqs[VIRTIO_BALLOON_VQ_FREE_PAGE];
+
+	return 0;
+}
+
+static int send_cmd_id_start(struct virtio_balloon *vb)
+{
+	struct scatterlist sg;
+	struct virtqueue *vq = vb->free_page_vq;
+	int err, unused;
+
+	/* Detach all the used buffers from the vq */
+	while (virtqueue_get_buf(vq, &unused))
+		;
+
+	vb->cmd_id_active = cpu_to_virtio32(vb->vdev, vb->cmd_id_received);
+	sg_init_one(&sg, &vb->cmd_id_active, sizeof(vb->cmd_id_active));
+	err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_active, GFP_KERNEL);
+	if (!err)
+		virtqueue_kick(vq);
+	return err;
+}
+
+static int send_cmd_id_stop(struct virtio_balloon *vb)
+{
+	struct scatterlist sg;
+	struct virtqueue *vq = vb->free_page_vq;
+	int err, unused;
+
+	/* Detach all the used buffers from the vq */
+	while (virtqueue_get_buf(vq, &unused))
+		;
+
+	sg_init_one(&sg, &vb->cmd_id_stop, sizeof(vb->cmd_id_stop));
+	err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_stop, GFP_KERNEL);
+	if (!err)
+		virtqueue_kick(vq);
+	return err;
+}
+
+static int get_free_page_and_send(struct virtio_balloon *vb)
+{
+	struct virtqueue *vq = vb->free_page_vq;
+	struct page *page;
+	struct scatterlist sg;
+	int err, unused;
+	void *p;
+
+	/* Detach all the used buffers from the vq */
+	while (virtqueue_get_buf(vq, &unused))
+		;
+
+	page = alloc_pages(VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG,
+			   VIRTIO_BALLOON_FREE_PAGE_ORDER);
+	/*
+	 * When the allocation returns NULL, it indicates that we have got all
+	 * the possible free pages, so return -EINTR to stop.
+	 */
+	if (!page)
+		return -EINTR;
+
+	p = page_address(page);
+	sg_init_one(&sg, p, VIRTIO_BALLOON_FREE_PAGE_SIZE);
+	/* There is always 1 entry reserved for the cmd id to use. */
+	if (vq->num_free > 1) {
+		err = virtqueue_add_inbuf(vq, &sg, 1, p, GFP_KERNEL);
+		if (unlikely(err)) {
+			free_pages((unsigned long)p,
+				   VIRTIO_BALLOON_FREE_PAGE_ORDER);
+			return err;
+		}
+		virtqueue_kick(vq);
+		spin_lock_irq(&vb->free_page_list_lock);
+		balloon_page_push(&vb->free_page_list, page);
+		vb->num_free_page_blocks++;
+		spin_unlock_irq(&vb->free_page_list_lock);
+	} else {
+		/*
+		 * The vq has no available entry to add this page block, so
+		 * just free it.
+		 */
+		free_pages((unsigned long)p, VIRTIO_BALLOON_FREE_PAGE_ORDER);
+	}
+
 	return 0;
 }
 
+static int send_free_pages(struct virtio_balloon *vb)
+{
+	int err;
+	u32 cmd_id_active;
+
+	while (1) {
+		/*
+		 * If a stop id or a new cmd id was just received from host,
+		 * stop the reporting.
+		 */
+		cmd_id_active = virtio32_to_cpu(vb->vdev, vb->cmd_id_active);
+		if (cmd_id_active != vb->cmd_id_received)
+			break;
+
+		/*
+		 * The free page blocks are allocated and sent to host one by
+		 * one.
+		 */
+		err = get_free_page_and_send(vb);
+		if (err == -EINTR)
+			break;
+		else if (unlikely(err))
+			return err;
+	}
+
+	return 0;
+}
+
+static void report_free_page_func(struct work_struct *work)
+{
+	int err;
+	struct virtio_balloon *vb = container_of(work, struct virtio_balloon,
+						 report_free_page_work);
+	struct device *dev = &vb->vdev->dev;
+
+	/* Start by sending the received cmd id to host with an outbuf. */
+	err = send_cmd_id_start(vb);
+	if (unlikely(err))
+		dev_err(dev, "Failed to send a start id, err = %d\n", err);
+
+	err = send_free_pages(vb);
+	if (unlikely(err))
+		dev_err(dev, "Failed to send a free page, err = %d\n", err);
+
+	/* End by sending a stop id to host with an outbuf. */
+	err = send_cmd_id_stop(vb);
+	if (unlikely(err))
+		dev_err(dev, "Failed to send a stop id, err = %d\n", err);
+}
+
 #ifdef CONFIG_BALLOON_COMPACTION
 /*
  * virtballoon_migratepage - perform the balloon page migration on behalf of
@@ -512,14 +742,23 @@ static struct file_system_type balloon_fs = {
 
 #endif /* CONFIG_BALLOON_COMPACTION */
 
-static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker,
-						  struct shrink_control *sc)
+static unsigned long shrink_free_pages(struct virtio_balloon *vb,
+				       unsigned long pages_to_free)
 {
-	unsigned long pages_to_free, pages_freed = 0;
-	struct virtio_balloon *vb = container_of(shrinker,
-					struct virtio_balloon, shrinker);
+	unsigned long blocks_to_free, blocks_freed;
 
-	pages_to_free = sc->nr_to_scan * VIRTIO_BALLOON_PAGES_PER_PAGE;
+	pages_to_free = round_up(pages_to_free,
+				 1 << VIRTIO_BALLOON_FREE_PAGE_ORDER);
+	blocks_to_free = pages_to_free >> VIRTIO_BALLOON_FREE_PAGE_ORDER;
+	blocks_freed = return_free_pages_to_mm(vb, blocks_to_free);
+
+	return blocks_freed << VIRTIO_BALLOON_FREE_PAGE_ORDER;
+}
+
+static unsigned long shrink_balloon_pages(struct virtio_balloon *vb,
+					  unsigned long pages_to_free)
+{
+	unsigned long pages_freed = 0;
 
 	/*
 	 * One invocation of leak_balloon can deflate at most
@@ -527,12 +766,33 @@ static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker,
 	 * multiple times to deflate pages till reaching pages_to_free.
 	 */
 	while (vb->num_pages && pages_to_free) {
+		pages_freed += leak_balloon(vb, pages_to_free) /
+					VIRTIO_BALLOON_PAGES_PER_PAGE;
 		pages_to_free -= pages_freed;
-		pages_freed += leak_balloon(vb, pages_to_free);
 	}
 	update_balloon_size(vb);
 
-	return pages_freed / VIRTIO_BALLOON_PAGES_PER_PAGE;
+	return pages_freed;
+}
+
+static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker,
+						  struct shrink_control *sc)
+{
+	unsigned long pages_to_free, pages_freed = 0;
+	struct virtio_balloon *vb = container_of(shrinker,
+					struct virtio_balloon, shrinker);
+
+	pages_to_free = sc->nr_to_scan * VIRTIO_BALLOON_PAGES_PER_PAGE;
+
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
+		pages_freed = shrink_free_pages(vb, pages_to_free);
+
+	if (pages_freed >= pages_to_free)
+		return pages_freed;
+
+	pages_freed += shrink_balloon_pages(vb, pages_to_free - pages_freed);
+
+	return pages_freed;
 }
 
 static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker,
@@ -540,8 +800,12 @@ static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker,
 {
 	struct virtio_balloon *vb = container_of(shrinker,
 					struct virtio_balloon, shrinker);
+	unsigned long count;
 
-	return vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
+	count = vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
+	count += vb->num_free_page_blocks >> VIRTIO_BALLOON_FREE_PAGE_ORDER;
+
+	return count;
 }
 
 static void virtio_balloon_unregister_shrinker(struct virtio_balloon *vb)
@@ -604,6 +868,31 @@ static int virtballoon_probe(struct virtio_device *vdev)
 	}
 	vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops;
 #endif
+	if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+		/*
+		 * There is always one entry reserved for cmd id, so the ring
+		 * size needs to be at least two to report free page hints.
+		 */
+		if (virtqueue_get_vring_size(vb->free_page_vq) < 2) {
+			err = -ENOSPC;
+			goto out_del_vqs;
+		}
+		vb->balloon_wq = alloc_workqueue("balloon-wq",
+					WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0);
+		if (!vb->balloon_wq) {
+			err = -ENOMEM;
+			goto out_del_vqs;
+		}
+		INIT_WORK(&vb->report_free_page_work, report_free_page_func);
+		vb->cmd_id_received = VIRTIO_BALLOON_CMD_ID_STOP;
+		vb->cmd_id_active = cpu_to_virtio32(vb->vdev,
+						  VIRTIO_BALLOON_CMD_ID_STOP);
+		vb->cmd_id_stop = cpu_to_virtio32(vb->vdev,
+						  VIRTIO_BALLOON_CMD_ID_STOP);
+		vb->num_free_page_blocks = 0;
+		spin_lock_init(&vb->free_page_list_lock);
+		INIT_LIST_HEAD(&vb->free_page_list);
+	}
 	/*
 	 * We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a
 	 * shrinker needs to be registered to relieve memory pressure.
@@ -611,7 +900,7 @@ static int virtballoon_probe(struct virtio_device *vdev)
 	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) {
 		err = virtio_balloon_register_shrinker(vb);
 		if (err)
-			goto out_del_vqs;
+			goto out_del_balloon_wq;
 	}
 	virtio_device_ready(vdev);
 
@@ -619,6 +908,9 @@ static int virtballoon_probe(struct virtio_device *vdev)
 		virtballoon_changed(vdev);
 	return 0;
 
+out_del_balloon_wq:
+	if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
+		destroy_workqueue(vb->balloon_wq);
 out_del_vqs:
 	vdev->config->del_vqs(vdev);
 out_free_vb:
@@ -652,6 +944,11 @@ static void virtballoon_remove(struct virtio_device *vdev)
 	cancel_work_sync(&vb->update_balloon_size_work);
 	cancel_work_sync(&vb->update_balloon_stats_work);
 
+	if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+		cancel_work_sync(&vb->report_free_page_work);
+		destroy_workqueue(vb->balloon_wq);
+	}
+
 	remove_common(vb);
 #ifdef CONFIG_BALLOON_COMPACTION
 	if (vb->vb_dev_info.inode)
@@ -703,6 +1000,7 @@ static unsigned int features[] = {
 	VIRTIO_BALLOON_F_MUST_TELL_HOST,
 	VIRTIO_BALLOON_F_STATS_VQ,
 	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
+	VIRTIO_BALLOON_F_FREE_PAGE_HINT,
 };
 
 static struct virtio_driver virtio_balloon_driver = {
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index 13b8cb5..47c9eb4 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -34,15 +34,20 @@
 #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
 #define VIRTIO_BALLOON_F_STATS_VQ	1 /* Memory Stats virtqueue */
 #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
+#define VIRTIO_BALLOON_F_FREE_PAGE_HINT	3 /* VQ to report free pages */
 
 /* Size of a PFN in the balloon interface. */
 #define VIRTIO_BALLOON_PFN_SHIFT 12
 
+#define VIRTIO_BALLOON_CMD_ID_STOP	0
+#define VIRTIO_BALLOON_CMD_ID_DONE	1
 struct virtio_balloon_config {
 	/* Number of pages host wants Guest to give up. */
 	__u32 num_pages;
 	/* Number of pages we've actually got in balloon. */
 	__u32 actual;
+	/* Free page report command id, readonly by guest */
+	__u32 free_page_report_cmd_id;
 };
 
 #define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */
-- 
2.7.4

^ permalink raw reply related

* [PATCH v37 0/3] Virtio-balloon: support free page reporting
From: Wei Wang @ 2018-08-27  1:32 UTC (permalink / raw)
  To: virtio-dev, linux-kernel, virtualization, kvm, linux-mm, mst,
	mhocko, akpm, dgilbert
  Cc: yang.zhang.wz, riel, quan.xu0, liliang.opensource, pbonzini,
	nilal, torvalds

The new feature, VIRTIO_BALLOON_F_FREE_PAGE_HINT, implemented by this
series enables the virtio-balloon driver to report hints of guest free
pages to host. It can be used to accelerate virtual machine (VM) live
migration. Here is an introduction of this usage:

Live migration needs to transfer the VM's memory from the source machine
to the destination round by round. For the 1st round, all the VM's memory
is transferred. From the 2nd round, only the pieces of memory that were
written by the guest (after the 1st round) are transferred. One method
that is popularly used by the hypervisor to track which part of memory is
written is to have the hypervisor write-protect all the guest memory.

This feature enables the optimization by skipping the transfer of guest
free pages during VM live migration. It is not concerned that the memory
pages are used after they are given to the hypervisor as a hint of the
free pages, because they will be tracked by the hypervisor and transferred
in the subsequent round if they are used and written.

* Tests
1 Test Environment
    Host: Intel(R) Xeon(R) CPU E5-2699 v4 @ 2.20GHz
    Migration setup: migrate_set_speed 100G, migrate_set_downtime 400ms

2 Test Results (results are averaged over several repeated runs)
    2.1 Guest setup: 8G RAM, 4 vCPU
        2.1.1 Idle guest live migration time
            Optimization v.s. Legacy = 620ms vs 2970ms
            --> ~79% reduction
        2.1.2 Guest live migration with Linux compilation workload
          (i.e. make bzImage -j4) running
          1) Live Migration Time:
             Optimization v.s. Legacy = 2273ms v.s. 4502ms
             --> ~50% reduction
          2) Linux Compilation Time:
             Optimization v.s. Legacy = 8min42s v.s. 8min43s
             --> no obvious difference

    2.2 Guest setup: 128G RAM, 4 vCPU
        2.2.1 Idle guest live migration time
            Optimization v.s. Legacy = 5294ms vs 41651ms
            --> ~87% reduction
        2.2.2 Guest live migration with Linux compilation workload
          1) Live Migration Time:
            Optimization v.s. Legacy = 8816ms v.s. 54201ms
            --> 84% reduction
          2) Linux Compilation Time:
             Optimization v.s. Legacy = 8min30s v.s. 8min36s
             --> no obvious difference

ChangeLog:
v36->v37:
    - free the reported pages to mm when receives a DONE cmd from host.
      Please see patch 1's commit log for reasons. Please see patch 1's
      commit for detailed explanations.

For ChangeLogs from v22 to v36, please reference
https://lkml.org/lkml/2018/7/20/199

For ChangeLogs before v21, please reference
https://lwn.net/Articles/743660/

Wei Wang (3):
  virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT
  mm/page_poison: expose page_poisoning_enabled to kernel modules
  virtio-balloon: VIRTIO_BALLOON_F_PAGE_POISON

 drivers/virtio/virtio_balloon.c     | 374 ++++++++++++++++++++++++++++++++----
 include/uapi/linux/virtio_balloon.h |   8 +
 mm/page_poison.c                    |   6 +
 3 files changed, 355 insertions(+), 33 deletions(-)

-- 
2.7.4

^ permalink raw reply

* Re: [PATCH net] vhost: correctly check the iova range when waking virtqueue
From: David Miller @ 2018-08-26  0:40 UTC (permalink / raw)
  To: jasowang; +Cc: kvm, mst, netdev, linux-kernel, virtualization
In-Reply-To: <20180824085313.21798-1-jasowang@redhat.com>

From: Jason Wang <jasowang@redhat.com>
Date: Fri, 24 Aug 2018 16:53:13 +0800

> We don't wakeup the virtqueue if the first byte of pending iova range
> is the last byte of the range we just got updated. This will lead a
> virtqueue to wait for IOTLB updating forever. Fixing by correct the
> check and wake up the virtqueue in this case.
> 
> Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API")
> Reported-by: Peter Xu <peterx@redhat.com>
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
> The patch is needed for -stable.

Applied and queued up for -stable, thanks Jason.

^ permalink raw reply

* Re: [PATCH v2 10/11] x86/paravirt: move the Xen-only pv_mmu_ops under the PARAVIRT_XXL umbrella
From: Juergen Gross @ 2018-08-24 14:15 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: rusty, x86, linux-kernel, virtualization, mingo, tglx, hpa,
	xen-devel, akataria, boris.ostrovsky
In-Reply-To: <20180824141218.GP24124@hirez.programming.kicks-ass.net>

On 24/08/18 16:12, Peter Zijlstra wrote:
> On Mon, Aug 13, 2018 at 09:37:38AM +0200, Juergen Gross wrote:
>>  struct pv_mmu_ops {
>> +	/* TLB operations */
>> +	void (*flush_tlb_user)(void);
>> +	void (*flush_tlb_kernel)(void);
>> +	void (*flush_tlb_one_user)(unsigned long addr);
>> +	void (*flush_tlb_others)(const struct cpumask *cpus,
>> +				 const struct flush_tlb_info *info);
>> +
>> +	/* Hook for intercepting the destruction of an mm_struct. */
>> +	void (*exit_mmap)(struct mm_struct *mm);
> 
> Right, so I just wrecked that for you by adding a new:
> tlb_remove_table virt function. But I don't suppose that's a difficult
> thing to fix up.

Right. This will stay outside of XXL, I think. :-)


Juergen

^ permalink raw reply

* Re: [PATCH v2 09/11] x86/paravirt: move the Xen-only pv_irq_ops under the PARAVIRT_XXL umbrella
From: Juergen Gross @ 2018-08-24 14:13 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: rusty, x86, linux-kernel, virtualization, mingo, tglx, hpa,
	xen-devel, akataria, boris.ostrovsky
In-Reply-To: <20180824141045.GO24124@hirez.programming.kicks-ass.net>

On 24/08/18 16:10, Peter Zijlstra wrote:
> On Mon, Aug 13, 2018 at 09:37:37AM +0200, Juergen Gross wrote:
>> Some of the paravirt ops defined in pv_irq_ops are for Xen PV guests
>> only. Define them only if CONFIG_PARAVIRT_XXL is set.
>> diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
>> index e652ec27d945..ae53ee36d8fb 100644
>> --- a/arch/x86/include/asm/paravirt_types.h
>> +++ b/arch/x86/include/asm/paravirt_types.h
>> @@ -197,8 +197,10 @@ struct pv_irq_ops {
>>  	struct paravirt_callee_save irq_disable;
>>  	struct paravirt_callee_save irq_enable;
>>  
>> +#ifdef CONFIG_PARAVIRT_XXL
>>  	void (*safe_halt)(void);
>>  	void (*halt)(void);
>> +#endif
> 
> that makes me sad... but it appears VSMP also uses them. Can't you
> simply make VSMP also select XXL, I don't think that's used quite as
> much as Xen is :-)
> 

Sure, why not?

Any objections?


Juergen

^ permalink raw reply

* Re: [PATCH v2 00/11] x86/paravirt: several cleanups
From: Peter Zijlstra @ 2018-08-24 14:13 UTC (permalink / raw)
  To: Juergen Gross
  Cc: rusty, x86, linux-kernel, virtualization, mingo, tglx, hpa,
	xen-devel, akataria, boris.ostrovsky
In-Reply-To: <45bfe8ab-683f-ab79-e3c6-c0a707b667c2@suse.com>

On Fri, Aug 24, 2018 at 03:52:55PM +0200, Juergen Gross wrote:
> Ping?

Looking good; although I messed it up a little bit by adding a new
paravirt function.

Thanks for doing this!

^ permalink raw reply

* Re: [PATCH v2 10/11] x86/paravirt: move the Xen-only pv_mmu_ops under the PARAVIRT_XXL umbrella
From: Peter Zijlstra @ 2018-08-24 14:12 UTC (permalink / raw)
  To: Juergen Gross
  Cc: rusty, x86, linux-kernel, virtualization, mingo, tglx, hpa,
	xen-devel, akataria, boris.ostrovsky
In-Reply-To: <20180813073739.26108-11-jgross@suse.com>

On Mon, Aug 13, 2018 at 09:37:38AM +0200, Juergen Gross wrote:
>  struct pv_mmu_ops {
> +	/* TLB operations */
> +	void (*flush_tlb_user)(void);
> +	void (*flush_tlb_kernel)(void);
> +	void (*flush_tlb_one_user)(unsigned long addr);
> +	void (*flush_tlb_others)(const struct cpumask *cpus,
> +				 const struct flush_tlb_info *info);
> +
> +	/* Hook for intercepting the destruction of an mm_struct. */
> +	void (*exit_mmap)(struct mm_struct *mm);

Right, so I just wrecked that for you by adding a new:
tlb_remove_table virt function. But I don't suppose that's a difficult
thing to fix up.

^ permalink raw reply

* Re: [PATCH v2 09/11] x86/paravirt: move the Xen-only pv_irq_ops under the PARAVIRT_XXL umbrella
From: Peter Zijlstra @ 2018-08-24 14:10 UTC (permalink / raw)
  To: Juergen Gross
  Cc: rusty, x86, linux-kernel, virtualization, mingo, tglx, hpa,
	xen-devel, akataria, boris.ostrovsky
In-Reply-To: <20180813073739.26108-10-jgross@suse.com>

On Mon, Aug 13, 2018 at 09:37:37AM +0200, Juergen Gross wrote:
> Some of the paravirt ops defined in pv_irq_ops are for Xen PV guests
> only. Define them only if CONFIG_PARAVIRT_XXL is set.
> diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
> index e652ec27d945..ae53ee36d8fb 100644
> --- a/arch/x86/include/asm/paravirt_types.h
> +++ b/arch/x86/include/asm/paravirt_types.h
> @@ -197,8 +197,10 @@ struct pv_irq_ops {
>  	struct paravirt_callee_save irq_disable;
>  	struct paravirt_callee_save irq_enable;
>  
> +#ifdef CONFIG_PARAVIRT_XXL
>  	void (*safe_halt)(void);
>  	void (*halt)(void);
> +#endif

that makes me sad... but it appears VSMP also uses them. Can't you
simply make VSMP also select XXL, I don't think that's used quite as
much as Xen is :-)

^ permalink raw reply

* Re: [PATCH v2 03/11] x86/paravirt: remove clobbers from struct paravirt_patch_site
From: Thomas Gleixner @ 2018-08-24 14:03 UTC (permalink / raw)
  To: Juergen Gross
  Cc: rusty, peterz, x86, linux-kernel, virtualization, mingo, hpa,
	xen-devel, akataria, boris.ostrovsky
In-Reply-To: <20180813073739.26108-4-jgross@suse.com>

On Mon, 13 Aug 2018, Juergen Gross wrote:

> There is no need any longer to store the clobbers in struct
> paravirt_patch_site. Remove clobbers from the struct and from the
> related macros.
> 
> While at it fix some lines longer than 80 characters.
> 
> Signed-off-by: Juergen Gross <jgross@suse.com>

Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

^ permalink raw reply

* Re: [PATCH v2 02/11] x86/paravirt: remove clobbers parameter from paravirt patch functions
From: Thomas Gleixner @ 2018-08-24 14:01 UTC (permalink / raw)
  To: Juergen Gross
  Cc: rusty, peterz, x86, linux-kernel, virtualization, mingo, hpa,
	xen-devel, akataria, boris.ostrovsky
In-Reply-To: <20180813073739.26108-3-jgross@suse.com>

On Mon, 13 Aug 2018, Juergen Gross wrote:

> The clobbers parameter from paravirt_patch_default() et al isn't used
> any longer. Remove it.
> 
> Signed-off-by: Juergen Gross <jgross@suse.com>

Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

^ permalink raw reply

* Re: [PATCH v2 01/11] x86/paravirt: make paravirt_patch_call() and paravirt_patch_jmp() static
From: Thomas Gleixner @ 2018-08-24 14:00 UTC (permalink / raw)
  To: Juergen Gross
  Cc: rusty, peterz, x86, linux-kernel, virtualization, mingo, hpa,
	xen-devel, akataria, boris.ostrovsky
In-Reply-To: <20180813073739.26108-2-jgross@suse.com>

On Mon, 13 Aug 2018, Juergen Gross wrote:

> paravirt_patch_call() and paravirt_patch_jmp() are used in paravirt.c
> only. Convert them to static.
> 
> Signed-off-by: Juergen Gross <jgross@suse.com>

Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

^ permalink raw reply

* Re: [PATCH v2 00/11] x86/paravirt: several cleanups
From: Juergen Gross @ 2018-08-24 13:52 UTC (permalink / raw)
  To: linux-kernel, xen-devel, x86, virtualization
  Cc: boris.ostrovsky, rusty, peterz, mingo, hpa, akataria, tglx
In-Reply-To: <20180813073739.26108-1-jgross@suse.com>

On 13/08/18 09:37, Juergen Gross wrote:
> This series removes some no longer needed stuff from paravirt
> infrastructure and puts large quantities of paravirt ops under a new
> config option PARAVIRT_XXL which is selected by XEN_PV only.
> 
> A pvops kernel without XEN_PV being configured is about 2.5% smaller
> with this series applied.
> 
> tip commit 5800dc5c19f34e6e03b5adab1282535cb102fafd ("x86/paravirt:
> Fix spectre-v2 mitigations for paravirt guests") is a prerequisite
> for this series.
> 
> The last 4 patches of this series require my Xen cleanup series
> https://lore.kernel.org/lkml/20180717120113.12756-1-jgross@suse.com/
> which hides more Xen PV-only code behind CONFIG_XEN_PV.
> 
> Changes in V2:
> - patch 4: shorten pv_ops sub-structure names (Jan Beulich)
> - patch 11: new patch
> 
> Juergen Gross (11):
>   x86/paravirt: make paravirt_patch_call() and paravirt_patch_jmp()
>     static
>   x86/paravirt: remove clobbers parameter from paravirt patch functions
>   x86/paravirt: remove clobbers from struct paravirt_patch_site
>   x86/paravirt: use a single ops structure
>   x86/paravirt: remove unused paravirt bits
>   x86/paravirt: introduce new config option PARAVIRT_XXL
>   x86/paravirt: move items in pv_info under PARAVIRT_XXL umbrella
>   x86/paravirt: move the Xen-only pv_cpu_ops under the PARAVIRT_XXL
>     umbrella
>   x86/paravirt: move the Xen-only pv_irq_ops under the PARAVIRT_XXL
>     umbrella
>   x86/paravirt: move the Xen-only pv_mmu_ops under the PARAVIRT_XXL
>     umbrella
>   x86/paravirt: remove unneeded mmu related paravirt ops bits
> 
>  arch/x86/Kconfig                            |   3 +
>  arch/x86/hyperv/mmu.c                       |   4 +-
>  arch/x86/include/asm/debugreg.h             |   2 +-
>  arch/x86/include/asm/desc.h                 |   4 +-
>  arch/x86/include/asm/fixmap.h               |   2 +-
>  arch/x86/include/asm/irqflags.h             |  56 ++--
>  arch/x86/include/asm/mmu_context.h          |   4 +-
>  arch/x86/include/asm/msr.h                  |   4 +-
>  arch/x86/include/asm/paravirt.h             | 399 +++++++++++++---------------
>  arch/x86/include/asm/paravirt_types.h       |  77 +++---
>  arch/x86/include/asm/pgalloc.h              |   2 +-
>  arch/x86/include/asm/pgtable-3level_types.h |   2 +-
>  arch/x86/include/asm/pgtable.h              |   7 +-
>  arch/x86/include/asm/processor.h            |   4 +-
>  arch/x86/include/asm/ptrace.h               |   3 +-
>  arch/x86/include/asm/segment.h              |   2 +-
>  arch/x86/include/asm/special_insns.h        |   4 +-
>  arch/x86/kernel/alternative.c               |   2 +-
>  arch/x86/kernel/asm-offsets.c               |  13 +-
>  arch/x86/kernel/asm-offsets_64.c            |   9 +-
>  arch/x86/kernel/cpu/common.c                |   4 +-
>  arch/x86/kernel/cpu/vmware.c                |   4 +-
>  arch/x86/kernel/head_64.S                   |   2 +-
>  arch/x86/kernel/kvm.c                       |  17 +-
>  arch/x86/kernel/kvmclock.c                  |   4 +-
>  arch/x86/kernel/paravirt-spinlocks.c        |  15 +-
>  arch/x86/kernel/paravirt.c                  | 292 ++++++++++----------
>  arch/x86/kernel/paravirt_patch_32.c         |  57 ++--
>  arch/x86/kernel/paravirt_patch_64.c         |  65 ++---
>  arch/x86/kernel/tsc.c                       |   2 +-
>  arch/x86/kernel/vsmp_64.c                   |  24 +-
>  arch/x86/xen/Kconfig                        |   1 +
>  arch/x86/xen/enlighten_pv.c                 |  31 ++-
>  arch/x86/xen/irq.c                          |   2 +-
>  arch/x86/xen/mmu_hvm.c                      |   2 +-
>  arch/x86/xen/mmu_pv.c                       |  28 +-
>  arch/x86/xen/spinlock.c                     |  11 +-
>  arch/x86/xen/time.c                         |   4 +-
>  drivers/xen/time.c                          |   2 +-
>  39 files changed, 575 insertions(+), 595 deletions(-)
> 

Ping?


Juergen

^ permalink raw reply

* Re: [PATCH net] vhost: correctly check the iova range when waking virtqueue
From: Michael S. Tsirkin @ 2018-08-24 11:02 UTC (permalink / raw)
  To: Jason Wang; +Cc: netdev, linux-kernel, kvm, virtualization
In-Reply-To: <20180824085313.21798-1-jasowang@redhat.com>

On Fri, Aug 24, 2018 at 04:53:13PM +0800, Jason Wang wrote:
> We don't wakeup the virtqueue if the first byte of pending iova range
> is the last byte of the range we just got updated. This will lead a
> virtqueue to wait for IOTLB updating forever. Fixing by correct the
> check and wake up the virtqueue in this case.
> 
> Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API")
> Reported-by: Peter Xu <peterx@redhat.com>
> Signed-off-by: Jason Wang <jasowang@redhat.com>

Acked-by: Michael S. Tsirkin <mst@redhat.com>

> ---
> The patch is needed for -stable.
> ---
>  drivers/vhost/vhost.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 96c1d8400822..b13c6b4b2c66 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -952,7 +952,7 @@ static void vhost_iotlb_notify_vq(struct vhost_dev *d,
>  	list_for_each_entry_safe(node, n, &d->pending_list, node) {
>  		struct vhost_iotlb_msg *vq_msg = &node->msg.iotlb;
>  		if (msg->iova <= vq_msg->iova &&
> -		    msg->iova + msg->size - 1 > vq_msg->iova &&
> +		    msg->iova + msg->size - 1 >= vq_msg->iova &&
>  		    vq_msg->type == VHOST_IOTLB_MISS) {
>  			vhost_poll_queue(&node->vq->poll);
>  			list_del(&node->node);
> -- 
> 2.17.1

^ permalink raw reply

* Re: [PATCH net] vhost: correctly check the iova range when waking virtqueue
From: Peter Xu @ 2018-08-24  9:36 UTC (permalink / raw)
  To: Jason Wang; +Cc: netdev, virtualization, linux-kernel, kvm, mst
In-Reply-To: <20180824085313.21798-1-jasowang@redhat.com>

On Fri, Aug 24, 2018 at 04:53:13PM +0800, Jason Wang wrote:
> We don't wakeup the virtqueue if the first byte of pending iova range
> is the last byte of the range we just got updated. This will lead a
> virtqueue to wait for IOTLB updating forever. Fixing by correct the
> check and wake up the virtqueue in this case.
> 
> Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API")
> Reported-by: Peter Xu <peterx@redhat.com>
> Signed-off-by: Jason Wang <jasowang@redhat.com>

Without this patch, this command will trigger the IO hang merely every
time from host to guest:

  netperf -H 1.2.3.4 -l 5 -t TCP_RR -- -b 100

After applying, I can run it 10 times continuously without a problem.

Reviewed-by: Peter Xu <peterx@redhat.com>
Tested-by: Peter Xu <peterx@redhat.com>

Thanks,

-- 
Peter Xu

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox