LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/6] exec: cleanup the execve wrappers
From: Christoph Hellwig @ 2020-06-18 14:46 UTC (permalink / raw)
  To: Al Viro
  Cc: linux-arch, linux-s390, linux-parisc, Arnd Bergmann, Brian Gerst,
	x86, linux-mips, linux-kernel, linux-fsdevel, Luis Chamberlain,
	sparclinux, linuxppc-dev, linux-arm-kernel
In-Reply-To: <20200618144627.114057-1-hch@lst.de>

Remove a whole bunch of wrappers that eventually all call
__do_execve_file, and consolidate the execvce helpers to:

  (1) __do_execveat, which is the lowest level helper implementing the
      actual functionality
  (2) do_execvat, which is used by all callers that want native
      pointers
  (3) do_compat_execve, which is used by all compat syscalls

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/exec.c               | 98 +++++++++++------------------------------
 include/linux/binfmts.h | 12 ++---
 init/main.c             |  7 +--
 kernel/umh.c            | 16 +++----
 4 files changed, 41 insertions(+), 92 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index e6e8a9a7032784..354fdaa536ae7d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1815,10 +1815,7 @@ static int exec_binprm(struct linux_binprm *bprm)
 	return 0;
 }
 
-/*
- * sys_execve() executes a new program.
- */
-static int __do_execve_file(int fd, struct filename *filename,
+static int __do_execveat(int fd, struct filename *filename,
 			    struct user_arg_ptr argv,
 			    struct user_arg_ptr envp,
 			    int flags, struct file *file)
@@ -1972,74 +1969,16 @@ static int __do_execve_file(int fd, struct filename *filename,
 	return retval;
 }
 
-static int do_execveat_common(int fd, struct filename *filename,
-			      struct user_arg_ptr argv,
-			      struct user_arg_ptr envp,
-			      int flags)
-{
-	return __do_execve_file(fd, filename, argv, envp, flags, NULL);
-}
-
-int do_execve_file(struct file *file, void *__argv, void *__envp)
-{
-	struct user_arg_ptr argv = { .ptr.native = __argv };
-	struct user_arg_ptr envp = { .ptr.native = __envp };
-
-	return __do_execve_file(AT_FDCWD, NULL, argv, envp, 0, file);
-}
-
-int do_execve(struct filename *filename,
-	const char __user *const __user *__argv,
-	const char __user *const __user *__envp)
-{
-	struct user_arg_ptr argv = { .ptr.native = __argv };
-	struct user_arg_ptr envp = { .ptr.native = __envp };
-	return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
-}
-
 int do_execveat(int fd, struct filename *filename,
 		const char __user *const __user *__argv,
 		const char __user *const __user *__envp,
-		int flags)
+		int flags, struct file *file)
 {
 	struct user_arg_ptr argv = { .ptr.native = __argv };
 	struct user_arg_ptr envp = { .ptr.native = __envp };
 
-	return do_execveat_common(fd, filename, argv, envp, flags);
-}
-
-#ifdef CONFIG_COMPAT
-static int compat_do_execve(struct filename *filename,
-	const compat_uptr_t __user *__argv,
-	const compat_uptr_t __user *__envp)
-{
-	struct user_arg_ptr argv = {
-		.is_compat = true,
-		.ptr.compat = __argv,
-	};
-	struct user_arg_ptr envp = {
-		.is_compat = true,
-		.ptr.compat = __envp,
-	};
-	return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
-}
-
-static int compat_do_execveat(int fd, struct filename *filename,
-			      const compat_uptr_t __user *__argv,
-			      const compat_uptr_t __user *__envp,
-			      int flags)
-{
-	struct user_arg_ptr argv = {
-		.is_compat = true,
-		.ptr.compat = __argv,
-	};
-	struct user_arg_ptr envp = {
-		.is_compat = true,
-		.ptr.compat = __envp,
-	};
-	return do_execveat_common(fd, filename, argv, envp, flags);
+	return __do_execveat(fd, filename, argv, envp, flags, file);
 }
-#endif
 
 void set_binfmt(struct linux_binfmt *new)
 {
@@ -2070,7 +2009,7 @@ SYSCALL_DEFINE3(execve,
 		const char __user *const __user *, argv,
 		const char __user *const __user *, envp)
 {
-	return do_execve(getname(filename), argv, envp);
+	return do_execveat(AT_FDCWD, getname(filename), argv, envp, 0, NULL);
 }
 
 SYSCALL_DEFINE5(execveat,
@@ -2080,18 +2019,34 @@ SYSCALL_DEFINE5(execveat,
 		int, flags)
 {
 	int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
+	struct filename *name = getname_flags(filename, lookup_flags, NULL);
 
-	return do_execveat(fd,
-			   getname_flags(filename, lookup_flags, NULL),
-			   argv, envp, flags);
+	return do_execveat(fd, name, argv, envp, flags, NULL);
 }
 
 #ifdef CONFIG_COMPAT
+static int do_compat_execve(int fd, struct filename *filename,
+		const compat_uptr_t __user *__argv,
+		const compat_uptr_t __user *__envp,
+		int flags)
+{
+	struct user_arg_ptr argv = {
+		.is_compat = true,
+		.ptr.compat = __argv,
+	};
+	struct user_arg_ptr envp = {
+		.is_compat = true,
+		.ptr.compat = __envp,
+	};
+
+	return __do_execveat(fd, filename, argv, envp, flags, NULL);
+}
+
 COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
 	const compat_uptr_t __user *, argv,
 	const compat_uptr_t __user *, envp)
 {
-	return compat_do_execve(getname(filename), argv, envp);
+	return do_compat_execve(AT_FDCWD, getname(filename), argv, envp, 0);
 }
 
 COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
@@ -2101,9 +2056,8 @@ COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
 		       int,  flags)
 {
 	int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
+	struct filename *name = getname_flags(filename, lookup_flags, NULL);
 
-	return compat_do_execveat(fd,
-				  getname_flags(filename, lookup_flags, NULL),
-				  argv, envp, flags);
+	return do_compat_execve(fd, name, argv, envp, flags);
 }
 #endif
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 4a20b7517dd036..bed702e4b1fbd9 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -134,13 +134,9 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm);
 extern void set_binfmt(struct linux_binfmt *new);
 extern ssize_t read_code(struct file *, unsigned long, loff_t, size_t);
 
-extern int do_execve(struct filename *,
-		     const char __user * const __user *,
-		     const char __user * const __user *);
-extern int do_execveat(int, struct filename *,
-		       const char __user * const __user *,
-		       const char __user * const __user *,
-		       int);
-int do_execve_file(struct file *file, void *__argv, void *__envp);
+int do_execveat(int fd, struct filename *filename,
+		const char __user *const __user *__argv,
+		const char __user *const __user *__envp,
+		int flags, struct file *file);
 
 #endif /* _LINUX_BINFMTS_H */
diff --git a/init/main.c b/init/main.c
index 0ead83e86b5aa2..838950ea7bca22 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1329,9 +1329,10 @@ static int run_init_process(const char *init_filename)
 	pr_debug("  with environment:\n");
 	for (p = envp_init; *p; p++)
 		pr_debug("    %s\n", *p);
-	return do_execve(getname_kernel(init_filename),
-		(const char __user *const __user *)argv_init,
-		(const char __user *const __user *)envp_init);
+	return do_execveat(AT_FDCWD, getname_kernel(init_filename),
+			(const char __user *const __user *)argv_init,
+			(const char __user *const __user *)envp_init,
+			0, NULL);
 }
 
 static int try_to_run_init_process(const char *init_filename)
diff --git a/kernel/umh.c b/kernel/umh.c
index 79f139a7ca03c6..7aa9a5817582ca 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -103,15 +103,13 @@ static int call_usermodehelper_exec_async(void *data)
 	commit_creds(new);
 
 	sub_info->pid = task_pid_nr(current);
-	if (sub_info->file) {
-		retval = do_execve_file(sub_info->file,
-					sub_info->argv, sub_info->envp);
-		if (!retval)
-			current->flags |= PF_UMH;
-	} else
-		retval = do_execve(getname_kernel(sub_info->path),
-				   (const char __user *const __user *)sub_info->argv,
-				   (const char __user *const __user *)sub_info->envp);
+	retval = do_execveat(AT_FDCWD,
+			sub_info->path ? getname_kernel(sub_info->path) : NULL,
+			(const char __user *const __user *)sub_info->argv,
+			(const char __user *const __user *)sub_info->envp,
+			0, sub_info->file);
+	if (sub_info->file && !retval)
+		current->flags |= PF_UMH;
 out:
 	sub_info->retval = retval;
 	/*
-- 
2.26.2


^ permalink raw reply related

* properly support exec and wait with kernel pointers v2
From: Christoph Hellwig @ 2020-06-18 14:46 UTC (permalink / raw)
  To: Al Viro
  Cc: linux-arch, linux-s390, linux-parisc, Arnd Bergmann, Brian Gerst,
	x86, linux-mips, linux-kernel, linux-fsdevel, Luis Chamberlain,
	sparclinux, linuxppc-dev, linux-arm-kernel

Hi all,

this series first cleans up the exec code and then adds proper
kernel_execveat and kernel_wait callers instead of relying on the fact
that the early init code and kernel threads implicitly run with
the address limit set to KERNEL_DS.

Note that the cleanup removes the compat execve(at) handlers entirely, as
we can handle the compat difference very nicely in a unified codebase.
x32 needs two hacky #defines for that for now, although those can go
away if the x32 syscall rework from Brian gets merged.

Changes since v1:
 - remove a pointless ifdef from get_user_arg_ptr
 - remove the need for a compat syscall handler for x32


Diffstat:
 arch/arm64/include/asm/unistd32.h                  |    4 
 arch/mips/kernel/syscalls/syscall_n32.tbl          |    4 
 arch/mips/kernel/syscalls/syscall_o32.tbl          |    4 
 arch/parisc/kernel/syscalls/syscall.tbl            |    4 
 arch/powerpc/kernel/syscalls/syscall.tbl           |    4 
 arch/s390/kernel/syscalls/syscall.tbl              |    4 
 arch/sparc/kernel/syscalls.S                       |    4 
 arch/x86/entry/syscall_x32.c                       |    7 
 arch/x86/entry/syscalls/syscall_32.tbl             |    4 
 arch/x86/entry/syscalls/syscall_64.tbl             |    4 
 fs/exec.c                                          |  248 ++++++++-------------
 include/linux/binfmts.h                            |   10 
 include/linux/compat.h                             |    7 
 include/linux/sched/task.h                         |    1 
 include/uapi/asm-generic/unistd.h                  |    4 
 init/main.c                                        |    5 
 kernel/exit.c                                      |   16 +
 kernel/umh.c                                       |   43 ---
 tools/include/uapi/asm-generic/unistd.h            |    4 
 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl |    4 
 tools/perf/arch/s390/entry/syscalls/syscall.tbl    |    4 
 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl  |    4 
 22 files changed, 170 insertions(+), 223 deletions(-)

^ permalink raw reply

* [PATCH] mm/debug_vm_pgtable: Fix build failure with powerpc 8xx
From: Christophe Leroy @ 2020-06-18 14:31 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Will Deacon, Andrew Morton, Peter Zijlstra (Intel),
	Anshuman Khandual
  Cc: linux-mm, linuxppc-dev, linux-kernel

Since commit 9e343b467c70 ("READ_ONCE: Enforce atomicity for
{READ,WRITE}_ONCE() memory accesses"), READ_ONCE() cannot be used
anymore to read complex page table entries. This leads to:

  CC      mm/debug_vm_pgtable.o
In file included from ./include/asm-generic/bug.h:5,
                 from ./arch/powerpc/include/asm/bug.h:109,
                 from ./include/linux/bug.h:5,
                 from ./include/linux/mmdebug.h:5,
                 from ./include/linux/gfp.h:5,
                 from mm/debug_vm_pgtable.c:13:
In function 'pte_clear_tests',
    inlined from 'debug_vm_pgtable' at mm/debug_vm_pgtable.c:363:2:
./include/linux/compiler.h:392:38: error: call to '__compiletime_assert_210' declared with attribute error: Unsupported access size for {READ,WRITE}_ONCE().
  392 |  _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
      |                                      ^
./include/linux/compiler.h:373:4: note: in definition of macro '__compiletime_assert'
  373 |    prefix ## suffix();    \
      |    ^~~~~~
./include/linux/compiler.h:392:2: note: in expansion of macro '_compiletime_assert'
  392 |  _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
      |  ^~~~~~~~~~~~~~~~~~~
./include/linux/compiler.h:405:2: note: in expansion of macro 'compiletime_assert'
  405 |  compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \
      |  ^~~~~~~~~~~~~~~~~~
./include/linux/compiler.h:291:2: note: in expansion of macro 'compiletime_assert_rwonce_type'
  291 |  compiletime_assert_rwonce_type(x);    \
      |  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
mm/debug_vm_pgtable.c:249:14: note: in expansion of macro 'READ_ONCE'
  249 |  pte_t pte = READ_ONCE(*ptep);
      |              ^~~~~~~~~
make[2]: *** [mm/debug_vm_pgtable.o] Error 1

Fix it by using the recently added ptep_get() helper.

Fixes: 9e343b467c70 ("READ_ONCE: Enforce atomicity for {READ,WRITE}_ONCE() memory accesses")
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 mm/debug_vm_pgtable.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index e45623016aea..61ab16fb2e36 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -246,13 +246,13 @@ static void __init pgd_populate_tests(struct mm_struct *mm, pgd_t *pgdp,
 static void __init pte_clear_tests(struct mm_struct *mm, pte_t *ptep,
 				   unsigned long vaddr)
 {
-	pte_t pte = READ_ONCE(*ptep);
+	pte_t pte = ptep_get(ptep);
 
 	pte = __pte(pte_val(pte) | RANDOM_ORVALUE);
 	set_pte_at(mm, vaddr, ptep, pte);
 	barrier();
 	pte_clear(mm, vaddr, ptep);
-	pte = READ_ONCE(*ptep);
+	pte = ptep_get(ptep);
 	WARN_ON(!pte_none(pte));
 }
 
-- 
2.25.0


^ permalink raw reply related

* Re: [PATCH 3/3] powerpc/8xx: Provide ptep_get() with 16k pages
From: Christophe Leroy @ 2020-06-18 14:21 UTC (permalink / raw)
  To: Michael Ellerman, Peter Zijlstra
  Cc: Will Deacon, linux-kernel, linux-mm, Paul Mackerras,
	Andrew Morton, linuxppc-dev
In-Reply-To: <87pn9xchql.fsf@mpe.ellerman.id.au>



Le 18/06/2020 à 02:58, Michael Ellerman a écrit :
> Peter Zijlstra <peterz@infradead.org> writes:
>> On Thu, Jun 18, 2020 at 12:21:22AM +1000, Michael Ellerman wrote:
>>> Peter Zijlstra <peterz@infradead.org> writes:
>>>> On Mon, Jun 15, 2020 at 12:57:59PM +0000, Christophe Leroy wrote:
>>
>>>>> +#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES)
>>>>> +#define __HAVE_ARCH_PTEP_GET
>>>>> +static inline pte_t ptep_get(pte_t *ptep)
>>>>> +{
>>>>> +	pte_t pte = {READ_ONCE(ptep->pte), 0, 0, 0};
>>>>> +
>>>>> +	return pte;
>>>>> +}
>>>>> +#endif
>>>>
>>>> Would it make sense to have a comment with this magic? The casual reader
>>>> might wonder WTH just happened when he stumbles on this :-)
>>>
>>> I tried writing a helpful comment but it's too late for my brain to form
>>> sensible sentences.
>>>
>>> Christophe can you send a follow-up with a comment explaining it? In
>>> particular the zero entries stand out, it's kind of subtle that those
>>> entries are only populated with the right value when we write to the
>>> page table.
>>
>> static inline pte_t ptep_get(pte_t *ptep)
>> {
>> 	unsigned long val = READ_ONCE(ptep->pte);
>> 	/* 16K pages have 4 identical value 4K entries */
>> 	pte_t pte = {val, val, val, val);
>> 	return pte;
>> }
>>
>> Maybe something like that?
> 
> I think val wants to be pte_basic_t, but otherwise yeah I like that much
> better.
> 

I sent a patch for that.

I'll also send one to fix mm/debug_vm_pgtable.c which also uses 
READ_ONCE() to access page table entries.

Christophe

^ permalink raw reply

* Re: [PATCH 3/3] powerpc/8xx: Provide ptep_get() with 16k pages
From: Christophe Leroy @ 2020-06-18 14:19 UTC (permalink / raw)
  To: Michael Ellerman, Peter Zijlstra
  Cc: Will Deacon, linux-kernel, linux-mm, Paul Mackerras,
	Andrew Morton, linuxppc-dev
In-Reply-To: <87o8phchnu.fsf@mpe.ellerman.id.au>



Le 18/06/2020 à 03:00, Michael Ellerman a écrit :
> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>> Le 17/06/2020 à 16:38, Peter Zijlstra a écrit :
>>> On Thu, Jun 18, 2020 at 12:21:22AM +1000, Michael Ellerman wrote:
>>>> Peter Zijlstra <peterz@infradead.org> writes:
>>>>> On Mon, Jun 15, 2020 at 12:57:59PM +0000, Christophe Leroy wrote:
>>>
>>>>>> +#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES)
>>>>>> +#define __HAVE_ARCH_PTEP_GET
>>>>>> +static inline pte_t ptep_get(pte_t *ptep)
>>>>>> +{
>>>>>> +	pte_t pte = {READ_ONCE(ptep->pte), 0, 0, 0};
>>>>>> +
>>>>>> +	return pte;
>>>>>> +}
>>>>>> +#endif
>>>>>
>>>>> Would it make sense to have a comment with this magic? The casual reader
>>>>> might wonder WTH just happened when he stumbles on this :-)
>>>>
>>>> I tried writing a helpful comment but it's too late for my brain to form
>>>> sensible sentences.
>>>>
>>>> Christophe can you send a follow-up with a comment explaining it? In
>>>> particular the zero entries stand out, it's kind of subtle that those
>>>> entries are only populated with the right value when we write to the
>>>> page table.
>>>
>>> static inline pte_t ptep_get(pte_t *ptep)
>>> {
>>> 	unsigned long val = READ_ONCE(ptep->pte);
>>> 	/* 16K pages have 4 identical value 4K entries */
>>> 	pte_t pte = {val, val, val, val);
>>> 	return pte;
>>> }
>>>
>>> Maybe something like that?
>>
>> This should work as well. Indeed nobody cares about what's in the other
>> three. They are only there to ensure that ptep++ increases the ptep
>> pointer by 16 bytes. Only the HW require 4 identical values, that's
>> taken care of in set_pte_at() and pte_update().
> 
> Right, but it seems less error-prone to have the in-memory
> representation match what we have in the page table (well that's
> in-memory too but you know what I mean).
> 
>> So we should use the most efficient. Thinking once more, maybe what you
>> propose is the most efficient as there is no need to load another
>> register with value 0 in order to write it in the stack.
> 
> On 64-bit I'd say it makes zero difference, the only thing that's going
> to matter is the load from ptep->pte. I don't know whether that's true
> on the 8xx cores though.

On 8xx core, loading a register with value 0 will take one cycle unless 
there is some bubble left by another instruction (like a load from 
memory or a taken branch). But that's in the noise.

Christophe

^ permalink raw reply

* Re: [PATCH v2 0/2] powerpc/pci: unmap interrupts when a PHB is removed
From: Cédric Le Goater @ 2020-06-18 13:47 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev, Oliver O'Halloran
In-Reply-To: <20200617162938.743439-1-clg@kaod.org>

On 6/17/20 6:29 PM, Cédric Le Goater wrote:
> Hello,
> 
> When a passthrough IO adapter is removed from a pseries machine using
> hash MMU and the XIVE interrupt mode, the POWER hypervisor expects the
> guest OS to clear all page table entries related to the adapter. If
> some are still present, the RTAS call which isolates the PCI slot
> returns error 9001 "valid outstanding translations" and the removal of
> the IO adapter fails. This is because when the PHBs are scanned, Linux
> maps automatically some interrupts in the Linux interrupt number space
> but these are never removed.
> 
> To solve this problem, we introduce a PPC platform specific
> pcibios_remove_bus() routine which clears all interrupt mappings when
> the bus is removed. This also clears the associated page table entries
> of the ESB pages when using XIVE.
> 
> For this purpose, we record the logical interrupt numbers of the
> mapped interrupt under the PHB structure and let pcibios_remove_bus()
> do the clean up.
> 
> Tested on :
> 
>   - PowerNV with PCI, OpenCAPI, CAPI and GPU adapters. I don't know
>     how to inject a failure on a PHB but that would be a good test.

I found out that powering down the slot is enough :

	echo 0 > /sys/bus/pci/slots/<slot name>/power

The IRQ cleanup is done as expected on baremetal also.

Cheers,

C. 

>   - KVM P8+P9 guests with passthrough PCI adapters, but PHBs can not
>     be removed under QEMU/KVM.   
>   - PowerVM with passthrough PCI adapters (main target)
>   
> Thanks,
> 
> C.
> 
> Changes since v1:
> 
>  - extended the removal to interrupts other than the legacy INTx.
> 
> Cédric Le Goater (2):
>   powerpc/pci: unmap legacy INTx interrupts when a PHB is removed
>   powerpc/pci: unmap all interrupts when a PHB is removed
> 
>  arch/powerpc/include/asm/pci-bridge.h |   6 ++
>  arch/powerpc/kernel/pci-common.c      | 114 ++++++++++++++++++++++++++
>  2 files changed, 120 insertions(+)
> 


^ permalink raw reply

* Re: [PATCH v2 2/4] KVM: PPC: Book3S HV: track the state GFNs associated with secure VMs
From: Laurent Dufour @ 2020-06-18 13:31 UTC (permalink / raw)
  To: Ram Pai, kvm-ppc, linuxppc-dev
  Cc: cclaudio, bharata, sathnaga, aneesh.kumar, sukadev, bauerman,
	david
In-Reply-To: <1592471945-24786-3-git-send-email-linuxram@us.ibm.com>

Le 18/06/2020 à 11:19, Ram Pai a écrit :
> During the life of SVM, its GFNs transition through normal, secure and
> shared states. Since the kernel does not track GFNs that are shared, it
> is not possible to disambiguate a shared GFN from a GFN whose PFN has
> not yet been migrated to a secure-PFN. Also it is not possible to
> disambiguate a secure-GFN from a GFN whose GFN has been pagedout from
> the ultravisor.
> 
> The ability to identify the state of a GFN is needed to skip migration of its
> PFN to secure-PFN during ESM transition.
> 
> The code is re-organized to track the states of a GFN as explained
> below.
> 
> ************************************************************************
>   1. States of a GFN
>      ---------------
>   The GFN can be in one of the following states.
> 
>   (a) Secure - The GFN is secure. The GFN is associated with
>   	a Secure VM, the contents of the GFN is not accessible
>   	to the Hypervisor.  This GFN can be backed by a secure-PFN,
>   	or can be backed by a normal-PFN with contents encrypted.
>   	The former is true when the GFN is paged-in into the
>   	ultravisor. The latter is true when the GFN is paged-out
>   	of the ultravisor.
> 
>   (b) Shared - The GFN is shared. The GFN is associated with a
>   	a secure VM. The contents of the GFN is accessible to
>   	Hypervisor. This GFN is backed by a normal-PFN and its
>   	content is un-encrypted.
> 
>   (c) Normal - The GFN is a normal. The GFN is associated with
>   	a normal VM. The contents of the GFN is accesible to
>   	the Hypervisor. Its content is never encrypted.
> 
>   2. States of a VM.
>      ---------------
> 
>   (a) Normal VM:  A VM whose contents are always accessible to
>   	the hypervisor.  All its GFNs are normal-GFNs.
> 
>   (b) Secure VM: A VM whose contents are not accessible to the
>   	hypervisor without the VM's consent.  Its GFNs are
>   	either Shared-GFN or Secure-GFNs.
> 
>   (c) Transient VM: A Normal VM that is transitioning to secure VM.
>   	The transition starts on successful return of
>   	H_SVM_INIT_START, and ends on successful return
>   	of H_SVM_INIT_DONE. This transient VM, can have GFNs
>   	in any of the three states; i.e Secure-GFN, Shared-GFN,
>   	and Normal-GFN.	The VM never executes in this state
>   	in supervisor-mode.
> 
>   3. Memory slot State.
>      ------------------
>    	The state of a memory slot mirrors the state of the
>    	VM the memory slot is associated with.
> 
>   4. VM State transition.
>      --------------------
> 
>    A VM always starts in Normal Mode.
> 
>    H_SVM_INIT_START moves the VM into transient state. During this
>    time the Ultravisor may request some of its GFNs to be shared or
>    secured. So its GFNs can be in one of the three GFN states.
> 
>    H_SVM_INIT_DONE moves the VM entirely from transient state to
>    secure-state. At this point any left-over normal-GFNs are
>    transitioned to Secure-GFN.
> 
>    H_SVM_INIT_ABORT moves the transient VM back to normal VM.
>    All its GFNs are moved to Normal-GFNs.
> 
>    UV_TERMINATE transitions the secure-VM back to normal-VM. All
>    the secure-GFN and shared-GFNs are tranistioned to normal-GFN
>    Note: The contents of the normal-GFN is undefined at this point.
> 
>   5. GFN state implementation:
>      -------------------------
> 
>   Secure GFN is associated with a secure-PFN; also called uvmem_pfn,
>   when the GFN is paged-in. Its pfn[] has KVMPPC_GFN_UVMEM_PFN flag
>   set, and contains the value of the secure-PFN.
>   It is associated with a normal-PFN; also called mem_pfn, when
>   the GFN is pagedout. Its pfn[] has KVMPPC_GFN_MEM_PFN flag set.
>   The value of the normal-PFN is not tracked.
> 
>   Shared GFN is associated with a normal-PFN. Its pfn[] has
>   KVMPPC_UVMEM_SHARED_PFN flag set. The value of the normal-PFN
>   is not tracked.
> 
>   Normal GFN is associated with normal-PFN. Its pfn[] has
>   no flag set. The value of the normal-PFN is not tracked.
> 
>   6. Life cycle of a GFN
>      --------------------
>   --------------------------------------------------------------
>   |        |     Share  |  Unshare | SVM       |H_SVM_INIT_DONE|
>   |        |operation   |operation | abort/    |               |
>   |        |            |          | terminate |               |
>   -------------------------------------------------------------
>   |        |            |          |           |               |
>   | Secure |     Shared | Secure   |Normal     |Secure         |
>   |        |            |          |           |               |
>   | Shared |     Shared | Secure   |Normal     |Shared         |
>   |        |            |          |           |               |
>   | Normal |     Shared | Secure   |Normal     |Secure         |
>   --------------------------------------------------------------
> 
>   7. Life cycle of a VM
>      --------------------
>   --------------------------------------------------------------------
>   |         |  start    |  H_SVM_  |H_SVM_   |H_SVM_     |UV_SVM_    |
>   |         |  VM       |INIT_START|INIT_DONE|INIT_ABORT |TERMINATE  |
>   |         |           |          |         |           |           |
>   --------- ----------------------------------------------------------
>   |         |           |          |         |           |           |
>   | Normal  | Normal    | Transient|Error    |Error      |Normal     |
>   |         |           |          |         |           |           |
>   | Secure  |   Error   | Error    |Error    |Error      |Normal     |
>   |         |           |          |         |           |           |
>   |Transient|   N/A     | Error    |Secure   |Normal     |Normal     |
>   --------------------------------------------------------------------
> 
> ************************************************************************
> 
> Cc: Paul Mackerras <paulus@ozlabs.org>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: Bharata B Rao <bharata@linux.ibm.com>
> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
> Cc: Laurent Dufour <ldufour@linux.ibm.com>
> Cc: Thiago Jung Bauermann <bauerman@linux.ibm.com>
> Cc: David Gibson <david@gibson.dropbear.id.au>
> Cc: Claudio Carvalho <cclaudio@linux.ibm.com>
> Cc: kvm-ppc@vger.kernel.org
> Cc: linuxppc-dev@lists.ozlabs.org
> Reviewed-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
> Signed-off-by: Ram Pai <linuxram@us.ibm.com>
> ---
>   arch/powerpc/include/asm/kvm_book3s_uvmem.h |   6 +-
>   arch/powerpc/kvm/book3s_64_mmu_radix.c      |   2 +-
>   arch/powerpc/kvm/book3s_hv.c                |   2 +-
>   arch/powerpc/kvm/book3s_hv_uvmem.c          | 195 +++++++++++++++++++++++++---
>   4 files changed, 180 insertions(+), 25 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_book3s_uvmem.h b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
> index 5a9834e..f0c5708 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_uvmem.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
> @@ -21,7 +21,8 @@ unsigned long kvmppc_h_svm_page_out(struct kvm *kvm,
>   int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn);
>   unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm);
>   void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
> -			     struct kvm *kvm, bool skip_page_out);
> +			     struct kvm *kvm, bool skip_page_out,
> +			     bool purge_gfn);
>   #else
>   static inline int kvmppc_uvmem_init(void)
>   {
> @@ -75,6 +76,7 @@ static inline int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn)
>   
>   static inline void
>   kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
> -			struct kvm *kvm, bool skip_page_out) { }
> +			struct kvm *kvm, bool skip_page_out,
> +			bool purge_gfn) { }
>   #endif /* CONFIG_PPC_UV */
>   #endif /* __ASM_KVM_BOOK3S_UVMEM_H__ */
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> index 803940d..3448459 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> @@ -1100,7 +1100,7 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm,
>   	unsigned int shift;
>   
>   	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)
> -		kvmppc_uvmem_drop_pages(memslot, kvm, true);
> +		kvmppc_uvmem_drop_pages(memslot, kvm, true, false);

When reviewing the v1 of this series, I asked you the question about the fact 
that the call here is made with purge_gfn = false. Your answer was:

> This function does not know, under what context it is called. Since
> its job is to just flush the memslot, it cannot assume anything
> about purging the pages in the memslot.

Indeed in the case of the memory hotplug operation, this function is called to 
wipe the page from the secure device in the case the pages are secured. In that 
case the purge is required. Indeed, I checked the other call to 
kvmppc_radix_flush_memslot() in kvmppc_core_flush_memslot_hv() and I cannot see 
why in that case too purge_gfn should be false, especially when the memslot is 
reused as detailed in __kvm_set_memory_region() around the call to 
kvm_arch_flush_shadow_memslot().

I'm sorry to not have ask this earlier, but could you please elaborate on this?

>   
>   	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
>   		return;
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 6717d24..6cf80e5 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -5482,7 +5482,7 @@ static int kvmhv_svm_off(struct kvm *kvm)
>   			continue;
>   
>   		kvm_for_each_memslot(memslot, slots) {
> -			kvmppc_uvmem_drop_pages(memslot, kvm, true);
> +			kvmppc_uvmem_drop_pages(memslot, kvm, true, true);
>   			uv_unregister_mem_slot(kvm->arch.lpid, memslot->id);
>   		}
>   	}
> diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
> index 3599aaa..666d1bb 100644
> --- a/arch/powerpc/kvm/book3s_hv_uvmem.c
> +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
> @@ -98,7 +98,127 @@
>   static unsigned long *kvmppc_uvmem_bitmap;
>   static DEFINE_SPINLOCK(kvmppc_uvmem_bitmap_lock);
>   
> -#define KVMPPC_UVMEM_PFN	(1UL << 63)
> +/*
> + * States of a GFN
> + * ---------------
> + * The GFN can be in one of the following states.
> + *
> + * (a) Secure - The GFN is secure. The GFN is associated with
> + *	a Secure VM, the contents of the GFN is not accessible
> + *	to the Hypervisor.  This GFN can be backed by a secure-PFN,
> + *	or can be backed by a normal-PFN with contents encrypted.
> + *	The former is true when the GFN is paged-in into the
> + *	ultravisor. The latter is true when the GFN is paged-out
> + *	of the ultravisor.
> + *
> + * (b) Shared - The GFN is shared. The GFN is associated with a
> + *	a secure VM. The contents of the GFN is accessible to
> + *	Hypervisor. This GFN is backed by a normal-PFN and its
> + *	content is un-encrypted.
> + *
> + * (c) Normal - The GFN is a normal. The GFN is associated with
> + *	a normal VM. The contents of the GFN is accesible to
> + *	the Hypervisor. Its content is never encrypted.
> + *
> + * States of a VM.
> + * ---------------
> + *
> + * Normal VM:  A VM whose contents are always accessible to
> + *	the hypervisor.  All its GFNs are normal-GFNs.
> + *
> + * Secure VM: A VM whose contents are not accessible to the
> + *	hypervisor without the VM's consent.  Its GFNs are
> + *	either Shared-GFN or Secure-GFNs.
> + *
> + * Transient VM: A Normal VM that is transitioning to secure VM.
> + *	The transition starts on successful return of
> + *	H_SVM_INIT_START, and ends on successful return
> + *	of H_SVM_INIT_DONE. This transient VM, can have GFNs
> + *	in any of the three states; i.e Secure-GFN, Shared-GFN,
> + *	and Normal-GFN.	The VM never executes in this state
> + *	in supervisor-mode.
> + *
> + * Memory slot State.
> + * -----------------------------
> + *	The state of a memory slot mirrors the state of the
> + *	VM the memory slot is associated with.
> + *
> + * VM State transition.
> + * --------------------
> + *
> + *  A VM always starts in Normal Mode.
> + *
> + *  H_SVM_INIT_START moves the VM into transient state. During this
> + *  time the Ultravisor may request some of its GFNs to be shared or
> + *  secured. So its GFNs can be in one of the three GFN states.
> + *
> + *  H_SVM_INIT_DONE moves the VM entirely from transient state to
> + *  secure-state. At this point any left-over normal-GFNs are
> + *  transitioned to Secure-GFN.
> + *
> + *  H_SVM_INIT_ABORT moves the transient VM back to normal VM.
> + *  All its GFNs are moved to Normal-GFNs.
> + *
> + *  UV_TERMINATE transitions the secure-VM back to normal-VM. All
> + *  the secure-GFN and shared-GFNs are tranistioned to normal-GFN
> + *  Note: The contents of the normal-GFN is undefined at this point.
> + *
> + * GFN state implementation:
> + * -------------------------
> + *
> + * Secure GFN is associated with a secure-PFN; also called uvmem_pfn,
> + * when the GFN is paged-in. Its pfn[] has KVMPPC_GFN_UVMEM_PFN flag
> + * set, and contains the value of the secure-PFN.
> + * It is associated with a normal-PFN; also called mem_pfn, when
> + * the GFN is pagedout. Its pfn[] has KVMPPC_GFN_MEM_PFN flag set.
> + * The value of the normal-PFN is not tracked.
> + *
> + * Shared GFN is associated with a normal-PFN. Its pfn[] has
> + * KVMPPC_UVMEM_SHARED_PFN flag set. The value of the normal-PFN
> + * is not tracked.
> + *
> + * Normal GFN is associated with normal-PFN. Its pfn[] has
> + * no flag set. The value of the normal-PFN is not tracked.
> + *
> + * Life cycle of a GFN
> + * --------------------
> + *
> + * --------------------------------------------------------------
> + * |        |     Share  |  Unshare | SVM       |H_SVM_INIT_DONE|
> + * |        |operation   |operation | abort/    |               |
> + * |        |            |          | terminate |               |
> + * -------------------------------------------------------------
> + * |        |            |          |           |               |
> + * | Secure |     Shared | Secure   |Normal     |Secure         |
> + * |        |            |          |           |               |
> + * | Shared |     Shared | Secure   |Normal     |Shared         |
> + * |        |            |          |           |               |
> + * | Normal |     Shared | Secure   |Normal     |Secure         |
> + * --------------------------------------------------------------
> + *
> + * Life cycle of a VM
> + * --------------------
> + *
> + * --------------------------------------------------------------------
> + * |         |  start    |  H_SVM_  |H_SVM_   |H_SVM_     |UV_SVM_    |
> + * |         |  VM       |INIT_START|INIT_DONE|INIT_ABORT |TERMINATE  |
> + * |         |           |          |         |           |           |
> + * --------- ----------------------------------------------------------
> + * |         |           |          |         |           |           |
> + * | Normal  | Normal    | Transient|Error    |Error      |Normal     |
> + * |         |           |          |         |           |           |
> + * | Secure  |   Error   | Error    |Error    |Error      |Normal     |
> + * |         |           |          |         |           |           |
> + * |Transient|   N/A     | Error    |Secure   |Normal     |Normal     |
> + * --------------------------------------------------------------------
> + */
> +
> +#define KVMPPC_GFN_UVMEM_PFN	(1UL << 63)
> +#define KVMPPC_GFN_MEM_PFN	(1UL << 62)
> +#define KVMPPC_GFN_SHARED	(1UL << 61)
> +#define KVMPPC_GFN_SECURE	(KVMPPC_GFN_UVMEM_PFN | KVMPPC_GFN_MEM_PFN)
> +#define KVMPPC_GFN_FLAG_MASK	(KVMPPC_GFN_SECURE | KVMPPC_GFN_SHARED)
> +#define KVMPPC_GFN_PFN_MASK	(~KVMPPC_GFN_FLAG_MASK)
>   
>   struct kvmppc_uvmem_slot {
>   	struct list_head list;
> @@ -106,11 +226,11 @@ struct kvmppc_uvmem_slot {
>   	unsigned long base_pfn;
>   	unsigned long *pfns;
>   };
> -
>   struct kvmppc_uvmem_page_pvt {
>   	struct kvm *kvm;
>   	unsigned long gpa;
>   	bool skip_page_out;
> +	bool purge_gfn;
>   };
>   
>   int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot)
> @@ -154,8 +274,8 @@ void kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot)
>   	mutex_unlock(&kvm->arch.uvmem_lock);
>   }
>   
> -static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn,
> -				    struct kvm *kvm)
> +static void kvmppc_mark_gfn(unsigned long gfn, struct kvm *kvm,
> +			unsigned long flag, unsigned long uvmem_pfn)
>   {
>   	struct kvmppc_uvmem_slot *p;
>   
> @@ -163,24 +283,41 @@ static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn,
>   		if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
>   			unsigned long index = gfn - p->base_pfn;
>   
> -			p->pfns[index] = uvmem_pfn | KVMPPC_UVMEM_PFN;
> +			if (flag == KVMPPC_GFN_UVMEM_PFN)
> +				p->pfns[index] = uvmem_pfn | flag;
> +			else
> +				p->pfns[index] = flag;

That's minoir, but I'm wondering if that check is really needed since all the 
calls to kvmppc_mark_gfn() with flags != KVMPPC_GFN_UVMEM_PFN are made with 
uvmem_pfn = 0.

>   			return;
>   		}
>   	}
>   }
>   
> -static void kvmppc_uvmem_pfn_remove(unsigned long gfn, struct kvm *kvm)
> +/* mark the GFN as secure-GFN associated with @uvmem pfn device-PFN. */
> +static void kvmppc_gfn_secure_uvmem_pfn(unsigned long gfn,
> +			unsigned long uvmem_pfn, struct kvm *kvm)
>   {
> -	struct kvmppc_uvmem_slot *p;
> +	kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_UVMEM_PFN, uvmem_pfn);
> +}
>   
> -	list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
> -		if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
> -			p->pfns[gfn - p->base_pfn] = 0;
> -			return;
> -		}
> -	}
> +/* mark the GFN as secure-GFN associated with a memory-PFN. */
> +static void kvmppc_gfn_secure_mem_pfn(unsigned long gfn, struct kvm *kvm)
> +{
> +	kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_MEM_PFN, 0);
>   }
>   
> +/* mark the GFN as a shared GFN. */
> +static void kvmppc_gfn_shared(unsigned long gfn, struct kvm *kvm)
> +{
> +	kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_SHARED, 0);
> +}
> +
> +/* mark the GFN as a non-existent GFN. */
> +static void kvmppc_gfn_remove(unsigned long gfn, struct kvm *kvm)
> +{
> +	kvmppc_mark_gfn(gfn, kvm, 0, 0);
> +}
> +
> +/* return true, if the GFN is a secure-GFN backed by a secure-PFN */
>   static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
>   				    unsigned long *uvmem_pfn)
>   {
> @@ -190,10 +327,10 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
>   		if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
>   			unsigned long index = gfn - p->base_pfn;
>   
> -			if (p->pfns[index] & KVMPPC_UVMEM_PFN) {
> +			if (p->pfns[index] & KVMPPC_GFN_UVMEM_PFN) {
>   				if (uvmem_pfn)
>   					*uvmem_pfn = p->pfns[index] &
> -						     ~KVMPPC_UVMEM_PFN;
> +						     KVMPPC_GFN_PFN_MASK;
>   				return true;
>   			} else
>   				return false;
> @@ -257,9 +394,13 @@ unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
>    * is HV side fault on these pages. Next we *get* these pages, forcing
>    * fault on them, do fault time migration to replace the device PTEs in
>    * QEMU page table with normal PTEs from newly allocated pages.
> + *
> + * if @purge_gfn is set, invalidate the GFN. GFN is not shared nor secure
> + * anymore.
>    */
>   void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
> -			     struct kvm *kvm, bool skip_page_out)
> +			     struct kvm *kvm, bool skip_page_out,
> +			     bool purge_gfn)
>   {
>   	int i;
>   	struct kvmppc_uvmem_page_pvt *pvt;
> @@ -270,14 +411,17 @@ void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
>   		struct page *uvmem_page;
>   
>   		mutex_lock(&kvm->arch.uvmem_lock);
> +
>   		if (!kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
> +			if (purge_gfn)
> +				kvmppc_gfn_remove(gfn, kvm);
>   			mutex_unlock(&kvm->arch.uvmem_lock);
>   			continue;
>   		}
> -
>   		uvmem_page = pfn_to_page(uvmem_pfn);
>   		pvt = uvmem_page->zone_device_data;
>   		pvt->skip_page_out = skip_page_out;
> +		pvt->purge_gfn = purge_gfn;
>   		mutex_unlock(&kvm->arch.uvmem_lock);
>   
>   		pfn = gfn_to_pfn(kvm, gfn);
> @@ -305,7 +449,7 @@ unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm)
>   	srcu_idx = srcu_read_lock(&kvm->srcu);
>   
>   	kvm_for_each_memslot(memslot, kvm_memslots(kvm))
> -		kvmppc_uvmem_drop_pages(memslot, kvm, false);
> +		kvmppc_uvmem_drop_pages(memslot, kvm, false, true);
>   
>   	srcu_read_unlock(&kvm->srcu, srcu_idx);
>   
> @@ -347,7 +491,7 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
>   		goto out_clear;
>   
>   	uvmem_pfn = bit + pfn_first;
> -	kvmppc_uvmem_pfn_insert(gpa >> PAGE_SHIFT, uvmem_pfn, kvm);
> +	kvmppc_gfn_secure_uvmem_pfn(gpa >> PAGE_SHIFT, uvmem_pfn, kvm);
>   
>   	pvt->gpa = gpa;
>   	pvt->kvm = kvm;
> @@ -454,6 +598,7 @@ static unsigned long kvmppc_share_page(struct kvm *kvm, unsigned long gpa,
>   		uvmem_page = pfn_to_page(uvmem_pfn);
>   		pvt = uvmem_page->zone_device_data;
>   		pvt->skip_page_out = true;
> +		pvt->purge_gfn = false;
>   	}
>   
>   retry:
> @@ -467,12 +612,16 @@ static unsigned long kvmppc_share_page(struct kvm *kvm, unsigned long gpa,
>   		uvmem_page = pfn_to_page(uvmem_pfn);
>   		pvt = uvmem_page->zone_device_data;
>   		pvt->skip_page_out = true;
> +		pvt->purge_gfn = false;
>   		kvm_release_pfn_clean(pfn);
>   		goto retry;
>   	}
>   
> -	if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, page_shift))
> +	if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0,
> +				page_shift)) {
> +		kvmppc_gfn_shared(gfn, kvm);
>   		ret = H_SUCCESS;
> +	}
>   	kvm_release_pfn_clean(pfn);
>   	mutex_unlock(&kvm->arch.uvmem_lock);
>   out:
> @@ -530,6 +679,7 @@ unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
>   	if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift,
>   				&downgrade))
>   		ret = H_SUCCESS;
> +
>   out_unlock:
>   	mutex_unlock(&kvm->arch.uvmem_lock);
>   out:
> @@ -655,7 +805,10 @@ static void kvmppc_uvmem_page_free(struct page *page)
>   
>   	pvt = page->zone_device_data;
>   	page->zone_device_data = NULL;
> -	kvmppc_uvmem_pfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
> +	if (pvt->purge_gfn)
> +		kvmppc_gfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
> +	else
> +		kvmppc_gfn_secure_mem_pfn(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
>   	kfree(pvt);
>   }
>   
> 


^ permalink raw reply

* Re: [PATCH] powerpc/8xx: use pmd_off() to access a PMD entry in pte_update()
From: Michael Ellerman @ 2020-06-18 12:37 UTC (permalink / raw)
  To: Michael Ellerman, Andrew Morton, Mike Rapoport
  Cc: Christophe Leroy, linux-mm, linuxppc-dev, linux-kernel,
	Mike Rapoport
In-Reply-To: <20200615092229.23142-1-rppt@kernel.org>

On Mon, 15 Jun 2020 12:22:29 +0300, Mike Rapoport wrote:
> The pte_update() implementation for PPC_8xx unfolds page table from the PGD
> level to access a PMD entry. Since 8xx has only 2-level page table this can
> be simplified with pmd_off() shortcut.
> 
> Replace explicit unfolding with pmd_off() and drop defines of pgd_index()
> and pgd_offset() that are no longer needed.

Applied to powerpc/fixes.

[1/1] powerpc/8xx: use pmd_off() to access a PMD entry in pte_update()
      https://git.kernel.org/powerpc/c/687993ccf3b05070598b89fad97410b26d7bc9d2

cheers

^ permalink raw reply

* Re: [PATCH] powerpc/64s: Fix KVM interrupt using wrong save area
From: Michael Ellerman @ 2020-06-18 12:37 UTC (permalink / raw)
  To: linuxppc-dev, Nicholas Piggin; +Cc: Christian Zigotzky
In-Reply-To: <20200615061247.1310763-1-npiggin@gmail.com>

On Mon, 15 Jun 2020 16:12:47 +1000, Nicholas Piggin wrote:
> The CTR register reload in the KVM interrupt path used the wrong save
> area for SLB (and NMI) interrupts.

Applied to powerpc/fixes.

[1/1] powerpc/64s: Fix KVM interrupt using wrong save area
      https://git.kernel.org/powerpc/c/0bdcfa182506526fbe4e088ff9ca86a31b81828d

cheers

^ permalink raw reply

* Re: [PATCH 1/2] powerpc/syscalls: Use the number when building SPU syscall table
From: Michael Ellerman @ 2020-06-18 12:37 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev; +Cc: linux-arch, linux-kernel, arnd
In-Reply-To: <20200616135617.2937252-1-mpe@ellerman.id.au>

On Tue, 16 Jun 2020 23:56:16 +1000, Michael Ellerman wrote:
> Currently the macro that inserts entries into the SPU syscall table
> doesn't actually use the "nr" (syscall number) parameter.
> 
> This does work, but it relies on the exact right number of syscall
> entries being emitted in order for the syscal numbers to line up with
> the array entries. If for example we had two entries with the same
> syscall number we wouldn't get an error, it would just cause all
> subsequent syscalls to be off by one in the spu_syscall_table.
> 
> [...]

Applied to powerpc/fixes.

[1/2] powerpc/syscalls: Use the number when building SPU syscall table
      https://git.kernel.org/powerpc/c/1497eea68624f6076bf3eaf66baec3771ea04045
[2/2] powerpc/syscalls: Split SPU-ness out of ABI
      https://git.kernel.org/powerpc/c/35e32a6cb5f694fda54a5f391917e4ceefa0fece

cheers

^ permalink raw reply

* Re: [PATCH 0/3] Fix build failure with v5.8-rc1
From: Michael Ellerman @ 2020-06-18 12:37 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Christophe Leroy,
	Michael Ellerman, Peter Zijlstra (Intel), Will Deacon,
	Andrew Morton
  Cc: linux-mm, linuxppc-dev, linux-kernel
In-Reply-To: <cover.1592225557.git.christophe.leroy@csgroup.eu>

On Mon, 15 Jun 2020 12:57:55 +0000 (UTC), Christophe Leroy wrote:
> Commit 2ab3a0a02905 ("READ_ONCE: Enforce atomicity for
> {READ,WRITE}_ONCE() memory accesses") leads to following build
> failure on powerpc 8xx.
> 
> To fix it, this small series introduces a new helper named ptep_get()
> to replace the direct access with READ_ONCE(). This new helper
> can be overriden by architectures.
> 
> [...]

Applied to powerpc/fixes.

[1/3] mm/gup: Use huge_ptep_get() in gup_hugepte()
      https://git.kernel.org/powerpc/c/01a80ec6495f9e43f61b3231f3b283ca050a800e
[2/3] mm: Allow arches to provide ptep_get()
      https://git.kernel.org/powerpc/c/f7583fd6bdcc4d0b43f68fb81ebfae9669ee9338
[3/3] powerpc/8xx: Provide ptep_get() with 16k pages
      https://git.kernel.org/powerpc/c/b55129f97aeefd265314e12d98935330e011a14a

cheers

^ permalink raw reply

* Re: [PATCH v2 1/4] powerpc/instruction_dump: Fix kernel crash with show_instructions
From: Michael Ellerman @ 2020-06-18 12:37 UTC (permalink / raw)
  To: linuxppc-dev, mpe, Aneesh Kumar K.V
In-Reply-To: <20200524093822.423487-1-aneesh.kumar@linux.ibm.com>

On Sun, 24 May 2020 15:08:19 +0530, Aneesh Kumar K.V wrote:
> With Hard Lockup watchdog, we can hit a BUG() if we take a watchdog
> interrupt when in OPAL mode. This happens in show_instructions()
> where the kernel takes the watchdog NMI IPI with MSR_IR == 0.
> With that show_instructions() updates the variable pc in the loop
> and the second iterations will result in BUG().
> 
> We hit the BUG_ON due the below check in  __va()
> 
> [...]

Patch 1 applied to powerpc/fixes.

[1/4] powerpc: Fix kernel crash in show_instructions() w/DEBUG_VIRTUAL
      https://git.kernel.org/powerpc/c/a6e2c226c3d51fd93636320e47cabc8a8f0824c5

cheers

^ permalink raw reply

* [PATCH 2/2] powerpc/hv-24x7: Add sysfs files inside hv-24x7 device to show cpumask
From: Kajol Jain @ 2020-06-18 12:27 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: nathanl, kjain, suka, maddy, anju
In-Reply-To: <20200618122713.9030-1-kjain@linux.ibm.com>

Patch here adds a cpumask attr to hv_24x7 pmu along with ABI documentation.

command:# cat /sys/devices/hv_24x7/cpumask
0

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
---
 .../sysfs-bus-event_source-devices-hv_24x7    |  6 ++++
 arch/powerpc/perf/hv-24x7.c                   | 31 ++++++++++++++++++-
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7
index e8698afcd952..281e7b367733 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7
@@ -43,6 +43,12 @@ Description:	read only
 		This sysfs interface exposes the number of cores per chip
 		present in the system.
 
+What:		/sys/devices/hv_24x7/cpumask
+Date:		June 2020
+Contact:	Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description:	read only
+		This sysfs file exposes cpumask.
+
 What:		/sys/bus/event_source/devices/hv_24x7/event_descs/<event-name>
 Date:		February 2014
 Contact:	Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index fdc4ae155d60..03d870a9fc36 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -448,6 +448,12 @@ static ssize_t device_show_string(struct device *dev,
 	return sprintf(buf, "%s\n", (char *)d->var);
 }
 
+static ssize_t cpumask_get_attr(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	return cpumap_print_to_pagebuf(true, buf, &hv_24x7_cpumask);
+}
+
 static ssize_t sockets_show(struct device *dev,
 			    struct device_attribute *attr, char *buf)
 {
@@ -1116,6 +1122,17 @@ static DEVICE_ATTR_RO(sockets);
 static DEVICE_ATTR_RO(chipspersocket);
 static DEVICE_ATTR_RO(coresperchip);
 
+static DEVICE_ATTR(cpumask, S_IRUGO, cpumask_get_attr, NULL);
+
+static struct attribute *cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static struct attribute_group cpumask_attr_group = {
+	.attrs = cpumask_attrs,
+};
+
 static struct bin_attribute *if_bin_attrs[] = {
 	&bin_attr_catalog,
 	NULL,
@@ -1143,6 +1160,11 @@ static const struct attribute_group *attr_groups[] = {
 	&event_desc_group,
 	&event_long_desc_group,
 	&if_group,
+	/*
+	 * This NULL is a placeholder for the cpumask attr which will update
+	 * onlyif cpuhotplug registration is successful
+	 */
+	NULL,
 	NULL,
 };
 
@@ -1727,8 +1749,15 @@ static int hv_24x7_init(void)
 
 	/* init cpuhotplug */
 	r = hv_24x7_cpu_hotplug_init();
-	if (r)
+	if (r) {
 		pr_err("hv_24x7: CPU hotplug init failed\n");
+	} else {
+		/*
+		 * Cpu hotplug init is successful, add the
+		 * cpumask file as part of pmu attr group
+		 */
+		attr_groups[5] = &cpumask_attr_group;
+	}
 
 	r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1);
 	if (r)
-- 
2.18.2


^ permalink raw reply related

* [PATCH 1/2] powerpc/perf/hv-24x7: Add cpu hotplug support
From: Kajol Jain @ 2020-06-18 12:27 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: nathanl, kjain, suka, maddy, anju
In-Reply-To: <20200618122713.9030-1-kjain@linux.ibm.com>

Patch here adds cpu hotplug functions to hv_24x7 pmu.
A new cpuhp_state "CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE" enum
is added.

The online function update the cpumask only if its NULL.
As the primary intention for adding hotplug support
is to desiginate a CPU to make HCALL to collect the
count data.

The offline function test and clear corresponding cpu in a cpumask
and update cpumask to any other active cpu.

With this patchset, perf tool side does not need "-C <cpu>"
to be added.

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
---
 arch/powerpc/perf/hv-24x7.c | 45 +++++++++++++++++++++++++++++++++++++
 include/linux/cpuhotplug.h  |  1 +
 2 files changed, 46 insertions(+)

diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index db213eb7cb02..fdc4ae155d60 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -31,6 +31,8 @@ static int interface_version;
 /* Whether we have to aggregate result data for some domains. */
 static bool aggregate_result_elements;
 
+static cpumask_t hv_24x7_cpumask;
+
 static bool domain_is_valid(unsigned domain)
 {
 	switch (domain) {
@@ -1641,6 +1643,44 @@ static struct pmu h_24x7_pmu = {
 	.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
 };
 
+static int ppc_hv_24x7_cpu_online(unsigned int cpu)
+{
+	/* Make this CPU the designated target for counter collection */
+	if (cpumask_empty(&hv_24x7_cpumask))
+		cpumask_set_cpu(cpu, &hv_24x7_cpumask);
+
+	return 0;
+}
+
+static int ppc_hv_24x7_cpu_offline(unsigned int cpu)
+{
+	int target = -1;
+
+	/* Check if exiting cpu is used for collecting 24x7 events */
+	if (!cpumask_test_and_clear_cpu(cpu, &hv_24x7_cpumask))
+		return 0;
+
+	/* Find a new cpu to collect 24x7 events */
+	target = cpumask_any_but(cpu_active_mask, cpu);
+
+	if (target < 0 || target >= nr_cpu_ids)
+		return -1;
+
+	/* Migrate 24x7 events to the new target */
+	cpumask_set_cpu(target, &hv_24x7_cpumask);
+	perf_pmu_migrate_context(&h_24x7_pmu, cpu, target);
+
+	return 0;
+}
+
+static int hv_24x7_cpu_hotplug_init(void)
+{
+	return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE,
+			  "perf/powerpc/hv_24x7:online",
+			  ppc_hv_24x7_cpu_online,
+			  ppc_hv_24x7_cpu_offline);
+}
+
 static int hv_24x7_init(void)
 {
 	int r;
@@ -1685,6 +1725,11 @@ static int hv_24x7_init(void)
 	if (r)
 		return r;
 
+	/* init cpuhotplug */
+	r = hv_24x7_cpu_hotplug_init();
+	if (r)
+		pr_err("hv_24x7: CPU hotplug init failed\n");
+
 	r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1);
 	if (r)
 		return r;
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 8377afef8806..16ed8f6f8774 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -180,6 +180,7 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
 	CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
 	CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE,
+	CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE,
 	CPUHP_AP_WATCHDOG_ONLINE,
 	CPUHP_AP_WORKQUEUE_ONLINE,
 	CPUHP_AP_RCUTREE_ONLINE,
-- 
2.18.2


^ permalink raw reply related

* [PATCH 0/2] Add cpu hotplug support for powerpc/perf/hv-24x7
From: Kajol Jain @ 2020-06-18 12:27 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: nathanl, kjain, suka, maddy, anju

This patchset add cpu hotplug support for hv_24x7 driver by adding
online/offline cpu hotplug function. It also add sysfs file
"cpumask" to expose current online cpu that can be used for
hv_24x7 event count.

Kajol Jain (2):
  powerpc/perf/hv-24x7: Add cpu hotplug support
  powerpc/hv-24x7: Add sysfs files inside hv-24x7 device to show cpumask

 .../sysfs-bus-event_source-devices-hv_24x7    |  6 ++
 arch/powerpc/perf/hv-24x7.c                   | 74 +++++++++++++++++++
 include/linux/cpuhotplug.h                    |  1 +
 3 files changed, 81 insertions(+)

-- 
2.18.2


^ permalink raw reply

* [PATCH] powerpc/8xx: Modify ptep_get()
From: Christophe Leroy @ 2020-06-18 12:07 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Will Deacon, Andrew Morton, Peter Zijlstra (Intel)
  Cc: linux-mm, linuxppc-dev, linux-kernel

Move ptep_get() close to pte_update(), in an ifdef section already
dedicated to powerpc 8xx. This section contains explanation about
the layout of page table entries.

Also modify it to return 4 times the pte value instead of padding
with zeroes.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/nohash/32/pgtable.h | 22 +++++++++++---------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index b0afbdd07740..b9e134d0f03a 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -249,6 +249,18 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p
 
 	return old;
 }
+
+#ifdef CONFIG_PPC_16K_PAGES
+#define __HAVE_ARCH_PTEP_GET
+static inline pte_t ptep_get(pte_t *ptep)
+{
+	pte_basic_t val = READ_ONCE(ptep->pte);
+	pte_t pte = {val, val, val, val};
+
+	return pte;
+}
+#endif /* CONFIG_PPC_16K_PAGES */
+
 #else
 static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p,
 				     unsigned long clr, unsigned long set, int huge)
@@ -284,16 +296,6 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 	return __pte(pte_update(mm, addr, ptep, ~0, 0, 0));
 }
 
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES)
-#define __HAVE_ARCH_PTEP_GET
-static inline pte_t ptep_get(pte_t *ptep)
-{
-	pte_t pte = {READ_ONCE(ptep->pte), 0, 0, 0};
-
-	return pte;
-}
-#endif
-
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 				      pte_t *ptep)
-- 
2.25.0


^ permalink raw reply related

* [PATCH] ASoC: fsl_spdif: Add pm runtime function
From: Shengjiu Wang @ 2020-06-18 11:55 UTC (permalink / raw)
  To: timur, nicoleotsuka, Xiubo.Lee, festevam, broonie, perex, tiwai,
	alsa-devel
  Cc: linuxppc-dev, linux-kernel

Add pm runtime support and move clock handling there.
Close the clocks at suspend to reduce the power consumption.

fsl_spdif_suspend is replaced by pm_runtime_force_suspend.
fsl_spdif_resume is replaced by pm_runtime_force_resume.

Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
---
 sound/soc/fsl/fsl_spdif.c | 113 ++++++++++++++++++++++----------------
 1 file changed, 67 insertions(+), 46 deletions(-)

diff --git a/sound/soc/fsl/fsl_spdif.c b/sound/soc/fsl/fsl_spdif.c
index 5bc0e4729341..46719fd2f1ec 100644
--- a/sound/soc/fsl/fsl_spdif.c
+++ b/sound/soc/fsl/fsl_spdif.c
@@ -16,6 +16,7 @@
 #include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/regmap.h>
+#include <linux/pm_runtime.h>
 
 #include <sound/asoundef.h>
 #include <sound/dmaengine_pcm.h>
@@ -495,25 +496,10 @@ static int fsl_spdif_startup(struct snd_pcm_substream *substream,
 	struct platform_device *pdev = spdif_priv->pdev;
 	struct regmap *regmap = spdif_priv->regmap;
 	u32 scr, mask;
-	int i;
 	int ret;
 
 	/* Reset module and interrupts only for first initialization */
 	if (!snd_soc_dai_active(cpu_dai)) {
-		ret = clk_prepare_enable(spdif_priv->coreclk);
-		if (ret) {
-			dev_err(&pdev->dev, "failed to enable core clock\n");
-			return ret;
-		}
-
-		if (!IS_ERR(spdif_priv->spbaclk)) {
-			ret = clk_prepare_enable(spdif_priv->spbaclk);
-			if (ret) {
-				dev_err(&pdev->dev, "failed to enable spba clock\n");
-				goto err_spbaclk;
-			}
-		}
-
 		ret = spdif_softreset(spdif_priv);
 		if (ret) {
 			dev_err(&pdev->dev, "failed to soft reset\n");
@@ -531,18 +517,10 @@ static int fsl_spdif_startup(struct snd_pcm_substream *substream,
 		mask = SCR_TXFIFO_AUTOSYNC_MASK | SCR_TXFIFO_CTRL_MASK |
 			SCR_TXSEL_MASK | SCR_USRC_SEL_MASK |
 			SCR_TXFIFO_FSEL_MASK;
-		for (i = 0; i < SPDIF_TXRATE_MAX; i++) {
-			ret = clk_prepare_enable(spdif_priv->txclk[i]);
-			if (ret)
-				goto disable_txclk;
-		}
 	} else {
 		scr = SCR_RXFIFO_FSEL_IF8 | SCR_RXFIFO_AUTOSYNC;
 		mask = SCR_RXFIFO_FSEL_MASK | SCR_RXFIFO_AUTOSYNC_MASK|
 			SCR_RXFIFO_CTL_MASK | SCR_RXFIFO_OFF_MASK;
-		ret = clk_prepare_enable(spdif_priv->rxclk);
-		if (ret)
-			goto err;
 	}
 	regmap_update_bits(regmap, REG_SPDIF_SCR, mask, scr);
 
@@ -551,15 +529,7 @@ static int fsl_spdif_startup(struct snd_pcm_substream *substream,
 
 	return 0;
 
-disable_txclk:
-	for (i--; i >= 0; i--)
-		clk_disable_unprepare(spdif_priv->txclk[i]);
 err:
-	if (!IS_ERR(spdif_priv->spbaclk))
-		clk_disable_unprepare(spdif_priv->spbaclk);
-err_spbaclk:
-	clk_disable_unprepare(spdif_priv->coreclk);
-
 	return ret;
 }
 
@@ -569,20 +539,17 @@ static void fsl_spdif_shutdown(struct snd_pcm_substream *substream,
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct fsl_spdif_priv *spdif_priv = snd_soc_dai_get_drvdata(asoc_rtd_to_cpu(rtd, 0));
 	struct regmap *regmap = spdif_priv->regmap;
-	u32 scr, mask, i;
+	u32 scr, mask;
 
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
 		scr = 0;
 		mask = SCR_TXFIFO_AUTOSYNC_MASK | SCR_TXFIFO_CTRL_MASK |
 			SCR_TXSEL_MASK | SCR_USRC_SEL_MASK |
 			SCR_TXFIFO_FSEL_MASK;
-		for (i = 0; i < SPDIF_TXRATE_MAX; i++)
-			clk_disable_unprepare(spdif_priv->txclk[i]);
 	} else {
 		scr = SCR_RXFIFO_OFF | SCR_RXFIFO_CTL_ZERO;
 		mask = SCR_RXFIFO_FSEL_MASK | SCR_RXFIFO_AUTOSYNC_MASK|
 			SCR_RXFIFO_CTL_MASK | SCR_RXFIFO_OFF_MASK;
-		clk_disable_unprepare(spdif_priv->rxclk);
 	}
 	regmap_update_bits(regmap, REG_SPDIF_SCR, mask, scr);
 
@@ -591,9 +558,6 @@ static void fsl_spdif_shutdown(struct snd_pcm_substream *substream,
 		spdif_intr_status_clear(spdif_priv);
 		regmap_update_bits(regmap, REG_SPDIF_SCR,
 				SCR_LOW_POWER, SCR_LOW_POWER);
-		if (!IS_ERR(spdif_priv->spbaclk))
-			clk_disable_unprepare(spdif_priv->spbaclk);
-		clk_disable_unprepare(spdif_priv->coreclk);
 	}
 }
 
@@ -1350,6 +1314,8 @@ static int fsl_spdif_probe(struct platform_device *pdev)
 
 	/* Register with ASoC */
 	dev_set_drvdata(&pdev->dev, spdif_priv);
+	pm_runtime_enable(&pdev->dev);
+	regcache_cache_only(spdif_priv->regmap, true);
 
 	ret = devm_snd_soc_register_component(&pdev->dev, &fsl_spdif_component,
 					      &spdif_priv->cpu_dai_drv, 1);
@@ -1365,36 +1331,91 @@ static int fsl_spdif_probe(struct platform_device *pdev)
 	return ret;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int fsl_spdif_suspend(struct device *dev)
+#ifdef CONFIG_PM
+static int fsl_spdif_runtime_suspend(struct device *dev)
 {
 	struct fsl_spdif_priv *spdif_priv = dev_get_drvdata(dev);
+	int i;
 
 	regmap_read(spdif_priv->regmap, REG_SPDIF_SRPC,
 			&spdif_priv->regcache_srpc);
-
 	regcache_cache_only(spdif_priv->regmap, true);
-	regcache_mark_dirty(spdif_priv->regmap);
+
+	clk_disable_unprepare(spdif_priv->rxclk);
+
+	for (i = 0; i < SPDIF_TXRATE_MAX; i++)
+		clk_disable_unprepare(spdif_priv->txclk[i]);
+
+	if (!IS_ERR(spdif_priv->spbaclk))
+		clk_disable_unprepare(spdif_priv->spbaclk);
+	clk_disable_unprepare(spdif_priv->coreclk);
 
 	return 0;
 }
 
-static int fsl_spdif_resume(struct device *dev)
+static int fsl_spdif_runtime_resume(struct device *dev)
 {
 	struct fsl_spdif_priv *spdif_priv = dev_get_drvdata(dev);
+	int ret;
+	int i;
+
+	ret = clk_prepare_enable(spdif_priv->coreclk);
+	if (ret) {
+		dev_err(dev, "failed to enable core clock\n");
+		return ret;
+	}
+
+	if (!IS_ERR(spdif_priv->spbaclk)) {
+		ret = clk_prepare_enable(spdif_priv->spbaclk);
+		if (ret) {
+			dev_err(dev, "failed to enable spba clock\n");
+			goto disable_core_clk;
+		}
+	}
+
+	for (i = 0; i < SPDIF_TXRATE_MAX; i++) {
+		ret = clk_prepare_enable(spdif_priv->txclk[i]);
+		if (ret)
+			goto disable_spba_clk;
+	}
+
+	ret = clk_prepare_enable(spdif_priv->rxclk);
+	if (ret)
+		goto disable_tx_clk;
 
 	regcache_cache_only(spdif_priv->regmap, false);
+	regcache_mark_dirty(spdif_priv->regmap);
 
 	regmap_update_bits(spdif_priv->regmap, REG_SPDIF_SRPC,
 			SRPC_CLKSRC_SEL_MASK | SRPC_GAINSEL_MASK,
 			spdif_priv->regcache_srpc);
 
-	return regcache_sync(spdif_priv->regmap);
+	ret = regcache_sync(spdif_priv->regmap);
+	if (ret)
+		goto disable_rx_clk;
+
+	return 0;
+
+disable_rx_clk:
+	clk_disable_unprepare(spdif_priv->rxclk);
+disable_tx_clk:
+disable_spba_clk:
+	for (i--; i >= 0; i--)
+		clk_disable_unprepare(spdif_priv->txclk[i]);
+	if (!IS_ERR(spdif_priv->spbaclk))
+		clk_disable_unprepare(spdif_priv->spbaclk);
+disable_core_clk:
+	clk_disable_unprepare(spdif_priv->coreclk);
+
+	return ret;
 }
-#endif /* CONFIG_PM_SLEEP */
+#endif
 
 static const struct dev_pm_ops fsl_spdif_pm = {
-	SET_SYSTEM_SLEEP_PM_OPS(fsl_spdif_suspend, fsl_spdif_resume)
+	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				pm_runtime_force_resume)
+	SET_RUNTIME_PM_OPS(fsl_spdif_runtime_suspend, fsl_spdif_runtime_resume,
+			   NULL)
 };
 
 static const struct of_device_id fsl_spdif_dt_ids[] = {
-- 
2.21.0


^ permalink raw reply related

* [PATCH v2 4/4] KVM: PPC: Book3S HV: migrate hot plugged memory
From: Ram Pai @ 2020-06-18  9:19 UTC (permalink / raw)
  To: kvm-ppc, linuxppc-dev
  Cc: ldufour, linuxram, cclaudio, bharata, sathnaga, aneesh.kumar,
	sukadev, bauerman, david
In-Reply-To: <1592471945-24786-1-git-send-email-linuxram@us.ibm.com>

From: Laurent Dufour <ldufour@linux.ibm.com>

When a memory slot is hot plugged to a SVM, PFNs associated with the
GFNs in that slot must be migrated to the secure-PFNs, aka device-PFNs.

kvmppc_uv_migrate_mem_slot() is called to accomplish this. UV_PAGE_IN
ucall is skipped, since the ultravisor does not trust the content of
those pages and hence ignores it.

Signed-off-by: Ram Pai <linuxram@us.ibm.com>
	[resolved conflicts, and modified the commit log]
Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
---
 arch/powerpc/include/asm/kvm_book3s_uvmem.h |  2 ++
 arch/powerpc/kvm/book3s_hv.c                | 10 ++++++----
 arch/powerpc/kvm/book3s_hv_uvmem.c          |  2 +-
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_uvmem.h b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
index f0c5708..05ae789 100644
--- a/arch/powerpc/include/asm/kvm_book3s_uvmem.h
+++ b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
@@ -23,6 +23,8 @@ unsigned long kvmppc_h_svm_page_out(struct kvm *kvm,
 void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
 			     struct kvm *kvm, bool skip_page_out,
 			     bool purge_gfn);
+int kvmppc_uv_migrate_mem_slot(struct kvm *kvm,
+				const struct kvm_memory_slot *memslot);
 #else
 static inline int kvmppc_uvmem_init(void)
 {
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 6cf80e5..bf7324d 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4531,10 +4531,12 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
 	case KVM_MR_CREATE:
 		if (kvmppc_uvmem_slot_init(kvm, new))
 			return;
-		uv_register_mem_slot(kvm->arch.lpid,
-				     new->base_gfn << PAGE_SHIFT,
-				     new->npages * PAGE_SIZE,
-				     0, new->id);
+		if (uv_register_mem_slot(kvm->arch.lpid,
+					 new->base_gfn << PAGE_SHIFT,
+					 new->npages * PAGE_SIZE,
+					 0, new->id))
+			return;
+		kvmppc_uv_migrate_mem_slot(kvm, new);
 		break;
 	case KVM_MR_DELETE:
 		uv_unregister_mem_slot(kvm->arch.lpid, old->id);
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index 78f8580..4d8f5bc 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -451,7 +451,7 @@ static int kvmppc_svm_migrate_page(struct vm_area_struct *vma,
 	return ret;
 }
 
-static int kvmppc_uv_migrate_mem_slot(struct kvm *kvm,
+int kvmppc_uv_migrate_mem_slot(struct kvm *kvm,
 		const struct kvm_memory_slot *memslot)
 {
 	unsigned long gfn = memslot->base_gfn;
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH v2 3/4] KVM: PPC: Book3S HV: migrate remaining normal-GFNs to secure-GFNs in H_SVM_INIT_DONE
From: Ram Pai @ 2020-06-18  9:19 UTC (permalink / raw)
  To: kvm-ppc, linuxppc-dev
  Cc: ldufour, linuxram, cclaudio, bharata, sathnaga, aneesh.kumar,
	sukadev, bauerman, david
In-Reply-To: <1592471945-24786-1-git-send-email-linuxram@us.ibm.com>

H_SVM_INIT_DONE incorrectly assumes that the Ultravisor has explicitly
called H_SVM_PAGE_IN for all secure pages. These GFNs continue to be
normal GFNs associated with normal PFNs; when infact, these GFNs should
have been secure GFNs, associated with device PFNs.

Move all the PFN associated with the SVM's GFNs, to secure-PFNs, in
H_SVM_INIT_DONE. Skip the GFNs that are already Paged-in or Shared
through H_SVM_PAGE_IN, or Paged-in followed by a Paged-out through
UV_PAGE_OUT.

Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Bharata B Rao <bharata@linux.ibm.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: Laurent Dufour <ldufour@linux.ibm.com>
Cc: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: Claudio Carvalho <cclaudio@linux.ibm.com>
Cc: kvm-ppc@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
---
 Documentation/powerpc/ultravisor.rst |   2 +
 arch/powerpc/kvm/book3s_hv_uvmem.c   | 235 +++++++++++++++++++++++++----------
 2 files changed, 171 insertions(+), 66 deletions(-)

diff --git a/Documentation/powerpc/ultravisor.rst b/Documentation/powerpc/ultravisor.rst
index 363736d..3bc8957 100644
--- a/Documentation/powerpc/ultravisor.rst
+++ b/Documentation/powerpc/ultravisor.rst
@@ -933,6 +933,8 @@ Return values
 	* H_UNSUPPORTED		if called from the wrong context (e.g.
 				from an SVM or before an H_SVM_INIT_START
 				hypercall).
+	* H_STATE		if the hypervisor could not successfully
+                                transition the VM to Secure VM.
 
 Description
 ~~~~~~~~~~~
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index 666d1bb..78f8580 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -339,6 +339,21 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
 	return false;
 }
 
+/* return true, if the GFN is a shared-GFN, or a secure-GFN */
+bool kvmppc_gfn_has_transitioned(unsigned long gfn, struct kvm *kvm)
+{
+	struct kvmppc_uvmem_slot *p;
+
+	list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
+		if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
+			unsigned long index = gfn - p->base_pfn;
+
+			return (p->pfns[index] & KVMPPC_GFN_FLAG_MASK);
+		}
+	}
+	return false;
+}
+
 unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
 {
 	struct kvm_memslots *slots;
@@ -377,14 +392,152 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
 	return ret;
 }
 
+static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm);
+
+/*
+ * Alloc a PFN from private device memory pool. If @pagein is true,
+ * copy page from normal memory to secure memory using UV_PAGE_IN uvcall.
+ */
+static int kvmppc_svm_migrate_page(struct vm_area_struct *vma,
+		unsigned long start,
+		unsigned long end, unsigned long gpa, struct kvm *kvm,
+		unsigned long page_shift,
+		bool pagein)
+{
+	unsigned long src_pfn, dst_pfn = 0;
+	struct migrate_vma mig;
+	struct page *dpage;
+	struct page *spage;
+	unsigned long pfn;
+	int ret = 0;
+
+	memset(&mig, 0, sizeof(mig));
+	mig.vma = vma;
+	mig.start = start;
+	mig.end = end;
+	mig.src = &src_pfn;
+	mig.dst = &dst_pfn;
+
+	ret = migrate_vma_setup(&mig);
+	if (ret)
+		return ret;
+
+	if (!(*mig.src & MIGRATE_PFN_MIGRATE)) {
+		ret = -1;
+		goto out_finalize;
+	}
+
+	dpage = kvmppc_uvmem_get_page(gpa, kvm);
+	if (!dpage) {
+		ret = -1;
+		goto out_finalize;
+	}
+
+	if (pagein) {
+		pfn = *mig.src >> MIGRATE_PFN_SHIFT;
+		spage = migrate_pfn_to_page(*mig.src);
+		if (spage) {
+			ret = uv_page_in(kvm->arch.lpid, pfn << page_shift,
+					gpa, 0, page_shift);
+			if (ret)
+				goto out_finalize;
+		}
+	}
+
+	*mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
+	migrate_vma_pages(&mig);
+out_finalize:
+	migrate_vma_finalize(&mig);
+	return ret;
+}
+
+static int kvmppc_uv_migrate_mem_slot(struct kvm *kvm,
+		const struct kvm_memory_slot *memslot)
+{
+	unsigned long gfn = memslot->base_gfn;
+	unsigned long end;
+	bool downgrade = false;
+	struct vm_area_struct *vma;
+	int i, ret = 0;
+	unsigned long start = gfn_to_hva(kvm, gfn);
+
+	if (kvm_is_error_hva(start))
+		return H_STATE;
+
+	end = start + (memslot->npages << PAGE_SHIFT);
+
+	down_write(&kvm->mm->mmap_sem);
+
+	mutex_lock(&kvm->arch.uvmem_lock);
+	vma = find_vma_intersection(kvm->mm, start, end);
+	if (!vma || vma->vm_start > start || vma->vm_end < end) {
+		ret = H_STATE;
+		goto out_unlock;
+	}
+
+	ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
+			  MADV_UNMERGEABLE, &vma->vm_flags);
+	downgrade_write(&kvm->mm->mmap_sem);
+	downgrade = true;
+	if (ret) {
+		ret = H_STATE;
+		goto out_unlock;
+	}
+
+	for (i = 0; i < memslot->npages; i++, ++gfn) {
+		/*
+		 * skip GFNs that have already tranistioned.
+		 * paged-in GFNs, shared GFNs, paged-in GFNs
+		 * that were later paged-out.
+		 */
+		if (kvmppc_gfn_has_transitioned(gfn, kvm))
+			continue;
+
+		start = gfn_to_hva(kvm, gfn);
+		end = start + (1UL << PAGE_SHIFT);
+		ret = kvmppc_svm_migrate_page(vma, start, end,
+			(gfn << PAGE_SHIFT), kvm, PAGE_SHIFT, false);
+
+		if (ret)
+			goto out_unlock;
+	}
+
+out_unlock:
+	mutex_unlock(&kvm->arch.uvmem_lock);
+	if (downgrade)
+		up_read(&kvm->mm->mmap_sem);
+	else
+		up_write(&kvm->mm->mmap_sem);
+	return ret;
+}
+
 unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
 {
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	int srcu_idx;
+	long ret = H_SUCCESS;
+
 	if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
 		return H_UNSUPPORTED;
 
+	/* migrate any unmoved normal pfn to device pfns*/
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+	slots = kvm_memslots(kvm);
+	kvm_for_each_memslot(memslot, slots) {
+		ret = kvmppc_uv_migrate_mem_slot(kvm, memslot);
+		if (ret) {
+			ret = H_STATE;
+			goto out;
+		}
+	}
+
 	kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE;
 	pr_info("LPID %d went secure\n", kvm->arch.lpid);
-	return H_SUCCESS;
+
+out:
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+	return ret;
 }
 
 /*
@@ -510,68 +663,6 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
 }
 
 /*
- * Alloc a PFN from private device memory pool and copy page from normal
- * memory to secure memory using UV_PAGE_IN uvcall.
- */
-static int kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
-		   unsigned long end, unsigned long gpa, struct kvm *kvm,
-		   unsigned long page_shift, bool *downgrade)
-{
-	unsigned long src_pfn, dst_pfn = 0;
-	struct migrate_vma mig;
-	struct page *spage;
-	unsigned long pfn;
-	struct page *dpage;
-	int ret = 0;
-
-	memset(&mig, 0, sizeof(mig));
-	mig.vma = vma;
-	mig.start = start;
-	mig.end = end;
-	mig.src = &src_pfn;
-	mig.dst = &dst_pfn;
-
-	/*
-	 * We come here with mmap_sem write lock held just for
-	 * ksm_madvise(), otherwise we only need read mmap_sem.
-	 * Hence downgrade to read lock once ksm_madvise() is done.
-	 */
-	ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
-			  MADV_UNMERGEABLE, &vma->vm_flags);
-	downgrade_write(&kvm->mm->mmap_sem);
-	*downgrade = true;
-	if (ret)
-		return ret;
-
-	ret = migrate_vma_setup(&mig);
-	if (ret)
-		return ret;
-
-	if (!(*mig.src & MIGRATE_PFN_MIGRATE)) {
-		ret = -1;
-		goto out_finalize;
-	}
-
-	dpage = kvmppc_uvmem_get_page(gpa, kvm);
-	if (!dpage) {
-		ret = -1;
-		goto out_finalize;
-	}
-
-	pfn = *mig.src >> MIGRATE_PFN_SHIFT;
-	spage = migrate_pfn_to_page(*mig.src);
-	if (spage)
-		uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0,
-			   page_shift);
-
-	*mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
-	migrate_vma_pages(&mig);
-out_finalize:
-	migrate_vma_finalize(&mig);
-	return ret;
-}
-
-/*
  * Shares the page with HV, thus making it a normal page.
  *
  * - If the page is already secure, then provision a new page and share
@@ -676,9 +767,21 @@ unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
 	if (!vma || vma->vm_start > start || vma->vm_end < end)
 		goto out_unlock;
 
-	if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift,
-				&downgrade))
-		ret = H_SUCCESS;
+	ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
+			  MADV_UNMERGEABLE, &vma->vm_flags);
+	downgrade_write(&kvm->mm->mmap_sem);
+	downgrade = true;
+	if (ret) {
+		ret = H_PARAMETER;
+		goto out_unlock;
+	}
+
+	ret = H_PARAMETER;
+	if (kvmppc_svm_migrate_page(vma, start, end, gpa, kvm, page_shift,
+				true))
+		goto out_unlock;
+
+	ret = H_SUCCESS;
 
 out_unlock:
 	mutex_unlock(&kvm->arch.uvmem_lock);
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH v2 2/4] KVM: PPC: Book3S HV: track the state GFNs associated with secure VMs
From: Ram Pai @ 2020-06-18  9:19 UTC (permalink / raw)
  To: kvm-ppc, linuxppc-dev
  Cc: ldufour, linuxram, cclaudio, bharata, sathnaga, aneesh.kumar,
	sukadev, bauerman, david
In-Reply-To: <1592471945-24786-1-git-send-email-linuxram@us.ibm.com>

During the life of SVM, its GFNs transition through normal, secure and
shared states. Since the kernel does not track GFNs that are shared, it
is not possible to disambiguate a shared GFN from a GFN whose PFN has
not yet been migrated to a secure-PFN. Also it is not possible to
disambiguate a secure-GFN from a GFN whose GFN has been pagedout from
the ultravisor.

The ability to identify the state of a GFN is needed to skip migration of its
PFN to secure-PFN during ESM transition.

The code is re-organized to track the states of a GFN as explained
below.

************************************************************************
 1. States of a GFN
    ---------------
 The GFN can be in one of the following states.

 (a) Secure - The GFN is secure. The GFN is associated with
 	a Secure VM, the contents of the GFN is not accessible
 	to the Hypervisor.  This GFN can be backed by a secure-PFN,
 	or can be backed by a normal-PFN with contents encrypted.
 	The former is true when the GFN is paged-in into the
 	ultravisor. The latter is true when the GFN is paged-out
 	of the ultravisor.

 (b) Shared - The GFN is shared. The GFN is associated with a
 	a secure VM. The contents of the GFN is accessible to
 	Hypervisor. This GFN is backed by a normal-PFN and its
 	content is un-encrypted.

 (c) Normal - The GFN is a normal. The GFN is associated with
 	a normal VM. The contents of the GFN is accesible to
 	the Hypervisor. Its content is never encrypted.

 2. States of a VM.
    ---------------

 (a) Normal VM:  A VM whose contents are always accessible to
 	the hypervisor.  All its GFNs are normal-GFNs.

 (b) Secure VM: A VM whose contents are not accessible to the
 	hypervisor without the VM's consent.  Its GFNs are
 	either Shared-GFN or Secure-GFNs.

 (c) Transient VM: A Normal VM that is transitioning to secure VM.
 	The transition starts on successful return of
 	H_SVM_INIT_START, and ends on successful return
 	of H_SVM_INIT_DONE. This transient VM, can have GFNs
 	in any of the three states; i.e Secure-GFN, Shared-GFN,
 	and Normal-GFN.	The VM never executes in this state
 	in supervisor-mode.

 3. Memory slot State.
    ------------------
  	The state of a memory slot mirrors the state of the
  	VM the memory slot is associated with.

 4. VM State transition.
    --------------------

  A VM always starts in Normal Mode.

  H_SVM_INIT_START moves the VM into transient state. During this
  time the Ultravisor may request some of its GFNs to be shared or
  secured. So its GFNs can be in one of the three GFN states.

  H_SVM_INIT_DONE moves the VM entirely from transient state to
  secure-state. At this point any left-over normal-GFNs are
  transitioned to Secure-GFN.

  H_SVM_INIT_ABORT moves the transient VM back to normal VM.
  All its GFNs are moved to Normal-GFNs.

  UV_TERMINATE transitions the secure-VM back to normal-VM. All
  the secure-GFN and shared-GFNs are tranistioned to normal-GFN
  Note: The contents of the normal-GFN is undefined at this point.

 5. GFN state implementation:
    -------------------------

 Secure GFN is associated with a secure-PFN; also called uvmem_pfn,
 when the GFN is paged-in. Its pfn[] has KVMPPC_GFN_UVMEM_PFN flag
 set, and contains the value of the secure-PFN.
 It is associated with a normal-PFN; also called mem_pfn, when
 the GFN is pagedout. Its pfn[] has KVMPPC_GFN_MEM_PFN flag set.
 The value of the normal-PFN is not tracked.

 Shared GFN is associated with a normal-PFN. Its pfn[] has
 KVMPPC_UVMEM_SHARED_PFN flag set. The value of the normal-PFN
 is not tracked.

 Normal GFN is associated with normal-PFN. Its pfn[] has
 no flag set. The value of the normal-PFN is not tracked.

 6. Life cycle of a GFN
    --------------------
 --------------------------------------------------------------
 |        |     Share  |  Unshare | SVM       |H_SVM_INIT_DONE|
 |        |operation   |operation | abort/    |               |
 |        |            |          | terminate |               |
 -------------------------------------------------------------
 |        |            |          |           |               |
 | Secure |     Shared | Secure   |Normal     |Secure         |
 |        |            |          |           |               |
 | Shared |     Shared | Secure   |Normal     |Shared         |
 |        |            |          |           |               |
 | Normal |     Shared | Secure   |Normal     |Secure         |
 --------------------------------------------------------------

 7. Life cycle of a VM
    --------------------
 --------------------------------------------------------------------
 |         |  start    |  H_SVM_  |H_SVM_   |H_SVM_     |UV_SVM_    |
 |         |  VM       |INIT_START|INIT_DONE|INIT_ABORT |TERMINATE  |
 |         |           |          |         |           |           |
 --------- ----------------------------------------------------------
 |         |           |          |         |           |           |
 | Normal  | Normal    | Transient|Error    |Error      |Normal     |
 |         |           |          |         |           |           |
 | Secure  |   Error   | Error    |Error    |Error      |Normal     |
 |         |           |          |         |           |           |
 |Transient|   N/A     | Error    |Secure   |Normal     |Normal     |
 --------------------------------------------------------------------

************************************************************************

Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Bharata B Rao <bharata@linux.ibm.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: Laurent Dufour <ldufour@linux.ibm.com>
Cc: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: Claudio Carvalho <cclaudio@linux.ibm.com>
Cc: kvm-ppc@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Reviewed-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
---
 arch/powerpc/include/asm/kvm_book3s_uvmem.h |   6 +-
 arch/powerpc/kvm/book3s_64_mmu_radix.c      |   2 +-
 arch/powerpc/kvm/book3s_hv.c                |   2 +-
 arch/powerpc/kvm/book3s_hv_uvmem.c          | 195 +++++++++++++++++++++++++---
 4 files changed, 180 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_uvmem.h b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
index 5a9834e..f0c5708 100644
--- a/arch/powerpc/include/asm/kvm_book3s_uvmem.h
+++ b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
@@ -21,7 +21,8 @@ unsigned long kvmppc_h_svm_page_out(struct kvm *kvm,
 int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn);
 unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm);
 void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
-			     struct kvm *kvm, bool skip_page_out);
+			     struct kvm *kvm, bool skip_page_out,
+			     bool purge_gfn);
 #else
 static inline int kvmppc_uvmem_init(void)
 {
@@ -75,6 +76,7 @@ static inline int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn)
 
 static inline void
 kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
-			struct kvm *kvm, bool skip_page_out) { }
+			struct kvm *kvm, bool skip_page_out,
+			bool purge_gfn) { }
 #endif /* CONFIG_PPC_UV */
 #endif /* __ASM_KVM_BOOK3S_UVMEM_H__ */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 803940d..3448459 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -1100,7 +1100,7 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm,
 	unsigned int shift;
 
 	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)
-		kvmppc_uvmem_drop_pages(memslot, kvm, true);
+		kvmppc_uvmem_drop_pages(memslot, kvm, true, false);
 
 	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
 		return;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 6717d24..6cf80e5 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -5482,7 +5482,7 @@ static int kvmhv_svm_off(struct kvm *kvm)
 			continue;
 
 		kvm_for_each_memslot(memslot, slots) {
-			kvmppc_uvmem_drop_pages(memslot, kvm, true);
+			kvmppc_uvmem_drop_pages(memslot, kvm, true, true);
 			uv_unregister_mem_slot(kvm->arch.lpid, memslot->id);
 		}
 	}
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index 3599aaa..666d1bb 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -98,7 +98,127 @@
 static unsigned long *kvmppc_uvmem_bitmap;
 static DEFINE_SPINLOCK(kvmppc_uvmem_bitmap_lock);
 
-#define KVMPPC_UVMEM_PFN	(1UL << 63)
+/*
+ * States of a GFN
+ * ---------------
+ * The GFN can be in one of the following states.
+ *
+ * (a) Secure - The GFN is secure. The GFN is associated with
+ *	a Secure VM, the contents of the GFN is not accessible
+ *	to the Hypervisor.  This GFN can be backed by a secure-PFN,
+ *	or can be backed by a normal-PFN with contents encrypted.
+ *	The former is true when the GFN is paged-in into the
+ *	ultravisor. The latter is true when the GFN is paged-out
+ *	of the ultravisor.
+ *
+ * (b) Shared - The GFN is shared. The GFN is associated with a
+ *	a secure VM. The contents of the GFN is accessible to
+ *	Hypervisor. This GFN is backed by a normal-PFN and its
+ *	content is un-encrypted.
+ *
+ * (c) Normal - The GFN is a normal. The GFN is associated with
+ *	a normal VM. The contents of the GFN is accesible to
+ *	the Hypervisor. Its content is never encrypted.
+ *
+ * States of a VM.
+ * ---------------
+ *
+ * Normal VM:  A VM whose contents are always accessible to
+ *	the hypervisor.  All its GFNs are normal-GFNs.
+ *
+ * Secure VM: A VM whose contents are not accessible to the
+ *	hypervisor without the VM's consent.  Its GFNs are
+ *	either Shared-GFN or Secure-GFNs.
+ *
+ * Transient VM: A Normal VM that is transitioning to secure VM.
+ *	The transition starts on successful return of
+ *	H_SVM_INIT_START, and ends on successful return
+ *	of H_SVM_INIT_DONE. This transient VM, can have GFNs
+ *	in any of the three states; i.e Secure-GFN, Shared-GFN,
+ *	and Normal-GFN.	The VM never executes in this state
+ *	in supervisor-mode.
+ *
+ * Memory slot State.
+ * -----------------------------
+ *	The state of a memory slot mirrors the state of the
+ *	VM the memory slot is associated with.
+ *
+ * VM State transition.
+ * --------------------
+ *
+ *  A VM always starts in Normal Mode.
+ *
+ *  H_SVM_INIT_START moves the VM into transient state. During this
+ *  time the Ultravisor may request some of its GFNs to be shared or
+ *  secured. So its GFNs can be in one of the three GFN states.
+ *
+ *  H_SVM_INIT_DONE moves the VM entirely from transient state to
+ *  secure-state. At this point any left-over normal-GFNs are
+ *  transitioned to Secure-GFN.
+ *
+ *  H_SVM_INIT_ABORT moves the transient VM back to normal VM.
+ *  All its GFNs are moved to Normal-GFNs.
+ *
+ *  UV_TERMINATE transitions the secure-VM back to normal-VM. All
+ *  the secure-GFN and shared-GFNs are tranistioned to normal-GFN
+ *  Note: The contents of the normal-GFN is undefined at this point.
+ *
+ * GFN state implementation:
+ * -------------------------
+ *
+ * Secure GFN is associated with a secure-PFN; also called uvmem_pfn,
+ * when the GFN is paged-in. Its pfn[] has KVMPPC_GFN_UVMEM_PFN flag
+ * set, and contains the value of the secure-PFN.
+ * It is associated with a normal-PFN; also called mem_pfn, when
+ * the GFN is pagedout. Its pfn[] has KVMPPC_GFN_MEM_PFN flag set.
+ * The value of the normal-PFN is not tracked.
+ *
+ * Shared GFN is associated with a normal-PFN. Its pfn[] has
+ * KVMPPC_UVMEM_SHARED_PFN flag set. The value of the normal-PFN
+ * is not tracked.
+ *
+ * Normal GFN is associated with normal-PFN. Its pfn[] has
+ * no flag set. The value of the normal-PFN is not tracked.
+ *
+ * Life cycle of a GFN
+ * --------------------
+ *
+ * --------------------------------------------------------------
+ * |        |     Share  |  Unshare | SVM       |H_SVM_INIT_DONE|
+ * |        |operation   |operation | abort/    |               |
+ * |        |            |          | terminate |               |
+ * -------------------------------------------------------------
+ * |        |            |          |           |               |
+ * | Secure |     Shared | Secure   |Normal     |Secure         |
+ * |        |            |          |           |               |
+ * | Shared |     Shared | Secure   |Normal     |Shared         |
+ * |        |            |          |           |               |
+ * | Normal |     Shared | Secure   |Normal     |Secure         |
+ * --------------------------------------------------------------
+ *
+ * Life cycle of a VM
+ * --------------------
+ *
+ * --------------------------------------------------------------------
+ * |         |  start    |  H_SVM_  |H_SVM_   |H_SVM_     |UV_SVM_    |
+ * |         |  VM       |INIT_START|INIT_DONE|INIT_ABORT |TERMINATE  |
+ * |         |           |          |         |           |           |
+ * --------- ----------------------------------------------------------
+ * |         |           |          |         |           |           |
+ * | Normal  | Normal    | Transient|Error    |Error      |Normal     |
+ * |         |           |          |         |           |           |
+ * | Secure  |   Error   | Error    |Error    |Error      |Normal     |
+ * |         |           |          |         |           |           |
+ * |Transient|   N/A     | Error    |Secure   |Normal     |Normal     |
+ * --------------------------------------------------------------------
+ */
+
+#define KVMPPC_GFN_UVMEM_PFN	(1UL << 63)
+#define KVMPPC_GFN_MEM_PFN	(1UL << 62)
+#define KVMPPC_GFN_SHARED	(1UL << 61)
+#define KVMPPC_GFN_SECURE	(KVMPPC_GFN_UVMEM_PFN | KVMPPC_GFN_MEM_PFN)
+#define KVMPPC_GFN_FLAG_MASK	(KVMPPC_GFN_SECURE | KVMPPC_GFN_SHARED)
+#define KVMPPC_GFN_PFN_MASK	(~KVMPPC_GFN_FLAG_MASK)
 
 struct kvmppc_uvmem_slot {
 	struct list_head list;
@@ -106,11 +226,11 @@ struct kvmppc_uvmem_slot {
 	unsigned long base_pfn;
 	unsigned long *pfns;
 };
-
 struct kvmppc_uvmem_page_pvt {
 	struct kvm *kvm;
 	unsigned long gpa;
 	bool skip_page_out;
+	bool purge_gfn;
 };
 
 int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot)
@@ -154,8 +274,8 @@ void kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot)
 	mutex_unlock(&kvm->arch.uvmem_lock);
 }
 
-static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn,
-				    struct kvm *kvm)
+static void kvmppc_mark_gfn(unsigned long gfn, struct kvm *kvm,
+			unsigned long flag, unsigned long uvmem_pfn)
 {
 	struct kvmppc_uvmem_slot *p;
 
@@ -163,24 +283,41 @@ static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn,
 		if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
 			unsigned long index = gfn - p->base_pfn;
 
-			p->pfns[index] = uvmem_pfn | KVMPPC_UVMEM_PFN;
+			if (flag == KVMPPC_GFN_UVMEM_PFN)
+				p->pfns[index] = uvmem_pfn | flag;
+			else
+				p->pfns[index] = flag;
 			return;
 		}
 	}
 }
 
-static void kvmppc_uvmem_pfn_remove(unsigned long gfn, struct kvm *kvm)
+/* mark the GFN as secure-GFN associated with @uvmem pfn device-PFN. */
+static void kvmppc_gfn_secure_uvmem_pfn(unsigned long gfn,
+			unsigned long uvmem_pfn, struct kvm *kvm)
 {
-	struct kvmppc_uvmem_slot *p;
+	kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_UVMEM_PFN, uvmem_pfn);
+}
 
-	list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
-		if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
-			p->pfns[gfn - p->base_pfn] = 0;
-			return;
-		}
-	}
+/* mark the GFN as secure-GFN associated with a memory-PFN. */
+static void kvmppc_gfn_secure_mem_pfn(unsigned long gfn, struct kvm *kvm)
+{
+	kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_MEM_PFN, 0);
 }
 
+/* mark the GFN as a shared GFN. */
+static void kvmppc_gfn_shared(unsigned long gfn, struct kvm *kvm)
+{
+	kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_SHARED, 0);
+}
+
+/* mark the GFN as a non-existent GFN. */
+static void kvmppc_gfn_remove(unsigned long gfn, struct kvm *kvm)
+{
+	kvmppc_mark_gfn(gfn, kvm, 0, 0);
+}
+
+/* return true, if the GFN is a secure-GFN backed by a secure-PFN */
 static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
 				    unsigned long *uvmem_pfn)
 {
@@ -190,10 +327,10 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
 		if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
 			unsigned long index = gfn - p->base_pfn;
 
-			if (p->pfns[index] & KVMPPC_UVMEM_PFN) {
+			if (p->pfns[index] & KVMPPC_GFN_UVMEM_PFN) {
 				if (uvmem_pfn)
 					*uvmem_pfn = p->pfns[index] &
-						     ~KVMPPC_UVMEM_PFN;
+						     KVMPPC_GFN_PFN_MASK;
 				return true;
 			} else
 				return false;
@@ -257,9 +394,13 @@ unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
  * is HV side fault on these pages. Next we *get* these pages, forcing
  * fault on them, do fault time migration to replace the device PTEs in
  * QEMU page table with normal PTEs from newly allocated pages.
+ *
+ * if @purge_gfn is set, invalidate the GFN. GFN is not shared nor secure
+ * anymore.
  */
 void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
-			     struct kvm *kvm, bool skip_page_out)
+			     struct kvm *kvm, bool skip_page_out,
+			     bool purge_gfn)
 {
 	int i;
 	struct kvmppc_uvmem_page_pvt *pvt;
@@ -270,14 +411,17 @@ void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
 		struct page *uvmem_page;
 
 		mutex_lock(&kvm->arch.uvmem_lock);
+
 		if (!kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
+			if (purge_gfn)
+				kvmppc_gfn_remove(gfn, kvm);
 			mutex_unlock(&kvm->arch.uvmem_lock);
 			continue;
 		}
-
 		uvmem_page = pfn_to_page(uvmem_pfn);
 		pvt = uvmem_page->zone_device_data;
 		pvt->skip_page_out = skip_page_out;
+		pvt->purge_gfn = purge_gfn;
 		mutex_unlock(&kvm->arch.uvmem_lock);
 
 		pfn = gfn_to_pfn(kvm, gfn);
@@ -305,7 +449,7 @@ unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm)
 	srcu_idx = srcu_read_lock(&kvm->srcu);
 
 	kvm_for_each_memslot(memslot, kvm_memslots(kvm))
-		kvmppc_uvmem_drop_pages(memslot, kvm, false);
+		kvmppc_uvmem_drop_pages(memslot, kvm, false, true);
 
 	srcu_read_unlock(&kvm->srcu, srcu_idx);
 
@@ -347,7 +491,7 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
 		goto out_clear;
 
 	uvmem_pfn = bit + pfn_first;
-	kvmppc_uvmem_pfn_insert(gpa >> PAGE_SHIFT, uvmem_pfn, kvm);
+	kvmppc_gfn_secure_uvmem_pfn(gpa >> PAGE_SHIFT, uvmem_pfn, kvm);
 
 	pvt->gpa = gpa;
 	pvt->kvm = kvm;
@@ -454,6 +598,7 @@ static unsigned long kvmppc_share_page(struct kvm *kvm, unsigned long gpa,
 		uvmem_page = pfn_to_page(uvmem_pfn);
 		pvt = uvmem_page->zone_device_data;
 		pvt->skip_page_out = true;
+		pvt->purge_gfn = false;
 	}
 
 retry:
@@ -467,12 +612,16 @@ static unsigned long kvmppc_share_page(struct kvm *kvm, unsigned long gpa,
 		uvmem_page = pfn_to_page(uvmem_pfn);
 		pvt = uvmem_page->zone_device_data;
 		pvt->skip_page_out = true;
+		pvt->purge_gfn = false;
 		kvm_release_pfn_clean(pfn);
 		goto retry;
 	}
 
-	if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, page_shift))
+	if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0,
+				page_shift)) {
+		kvmppc_gfn_shared(gfn, kvm);
 		ret = H_SUCCESS;
+	}
 	kvm_release_pfn_clean(pfn);
 	mutex_unlock(&kvm->arch.uvmem_lock);
 out:
@@ -530,6 +679,7 @@ unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
 	if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift,
 				&downgrade))
 		ret = H_SUCCESS;
+
 out_unlock:
 	mutex_unlock(&kvm->arch.uvmem_lock);
 out:
@@ -655,7 +805,10 @@ static void kvmppc_uvmem_page_free(struct page *page)
 
 	pvt = page->zone_device_data;
 	page->zone_device_data = NULL;
-	kvmppc_uvmem_pfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
+	if (pvt->purge_gfn)
+		kvmppc_gfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
+	else
+		kvmppc_gfn_secure_mem_pfn(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
 	kfree(pvt);
 }
 
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH v2 1/4] KVM: PPC: Book3S HV: Fix function definition in book3s_hv_uvmem.c
From: Ram Pai @ 2020-06-18  9:19 UTC (permalink / raw)
  To: kvm-ppc, linuxppc-dev
  Cc: ldufour, linuxram, cclaudio, bharata, sathnaga, aneesh.kumar,
	sukadev, bauerman, david
In-Reply-To: <1592471945-24786-1-git-send-email-linuxram@us.ibm.com>

Without this fix, git is confused. It generates wrong
function context for code changes in subsequent patches.
Weird, but true.

Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Bharata B Rao <bharata@linux.ibm.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: Laurent Dufour <ldufour@linux.ibm.com>
Cc: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: Claudio Carvalho <cclaudio@linux.ibm.com>
Cc: kvm-ppc@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
---
 arch/powerpc/kvm/book3s_hv_uvmem.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index ad950f89..3599aaa 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -369,8 +369,7 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
  * Alloc a PFN from private device memory pool and copy page from normal
  * memory to secure memory using UV_PAGE_IN uvcall.
  */
-static int
-kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
+static int kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
 		   unsigned long end, unsigned long gpa, struct kvm *kvm,
 		   unsigned long page_shift, bool *downgrade)
 {
@@ -437,8 +436,8 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
  * In the former case, uses dev_pagemap_ops.migrate_to_ram handler
  * to unmap the device page from QEMU's page tables.
  */
-static unsigned long
-kvmppc_share_page(struct kvm *kvm, unsigned long gpa, unsigned long page_shift)
+static unsigned long kvmppc_share_page(struct kvm *kvm, unsigned long gpa,
+		unsigned long page_shift)
 {
 
 	int ret = H_PARAMETER;
@@ -487,9 +486,9 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
  * H_PAGE_IN_SHARED flag makes the page shared which means that the same
  * memory in is visible from both UV and HV.
  */
-unsigned long
-kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
-		     unsigned long flags, unsigned long page_shift)
+unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
+		unsigned long flags,
+		unsigned long page_shift)
 {
 	bool downgrade = false;
 	unsigned long start, end;
@@ -546,10 +545,10 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
  * Provision a new page on HV side and copy over the contents
  * from secure memory using UV_PAGE_OUT uvcall.
  */
-static int
-kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start,
-		    unsigned long end, unsigned long page_shift,
-		    struct kvm *kvm, unsigned long gpa)
+static int kvmppc_svm_page_out(struct vm_area_struct *vma,
+		unsigned long start,
+		unsigned long end, unsigned long page_shift,
+		struct kvm *kvm, unsigned long gpa)
 {
 	unsigned long src_pfn, dst_pfn = 0;
 	struct migrate_vma mig;
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH v2 0/4] Migrate non-migrated pages of a SVM.
From: Ram Pai @ 2020-06-18  9:19 UTC (permalink / raw)
  To: kvm-ppc, linuxppc-dev
  Cc: ldufour, linuxram, cclaudio, bharata, sathnaga, aneesh.kumar,
	sukadev, bauerman, david

This patch series migrates the non-migrated pages of a SVM.
This is required when the UV calls H_SVM_INIT_DONE, and
when a memory-slot is hotplugged to a Secure VM.

Testing: Passed rigorous SVM reboot test using different
	sized SVMs.

Changelog:
	. fixed a bug observed by Bharata. Pages that
	where paged-in and later paged-out must also be
	skipped from migration during H_SVM_INIT_DONE.

Laurent Dufour (1):
  KVM: PPC: Book3S HV: migrate hot plugged memory

Ram Pai (3):
  KVM: PPC: Book3S HV: Fix function definition in book3s_hv_uvmem.c
  KVM: PPC: Book3S HV: track the state GFNs associated with secure VMs
  KVM: PPC: Book3S HV: migrate remaining normal-GFNs to secure-GFNs in
    H_SVM_INIT_DONE

 Documentation/powerpc/ultravisor.rst        |   2 +
 arch/powerpc/include/asm/kvm_book3s_uvmem.h |   8 +-
 arch/powerpc/kvm/book3s_64_mmu_radix.c      |   2 +-
 arch/powerpc/kvm/book3s_hv.c                |  12 +-
 arch/powerpc/kvm/book3s_hv_uvmem.c          | 449 ++++++++++++++++++++++------
 5 files changed, 368 insertions(+), 105 deletions(-)

-- 
1.8.3.1


^ permalink raw reply

* Re: [PATCH] mm: Move p?d_alloc_track to separate header file
From: Mike Rapoport @ 2020-06-18  9:17 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-arch, Stephen Rothwell, jroedel, linux-mm, peterz,
	Linus Torvalds, Joerg Roedel, linuxppc-dev, Steven Rostedt,
	linux-kernel, Abdul Haleem, linux-next, Satheesh Rajendran,
	Andy Lutomirski, manvanth, hch
In-Reply-To: <20200617181226.ab213ea1531b5dd6eca1b0b6@linux-foundation.org>

On Wed, Jun 17, 2020 at 06:12:26PM -0700, Andrew Morton wrote:
> On Tue,  9 Jun 2020 14:05:33 +0200 Joerg Roedel <joro@8bytes.org> wrote:
> 
> > From: Joerg Roedel <jroedel@suse.de>
> > 
> > The functions are only used in two source files, so there is no need
> > for them to be in the global <linux/mm.h> header. Move them to the new
> > <linux/pgalloc-track.h> header and include it only where needed.
> > 
> > ...
> >
> > new file mode 100644
> > index 000000000000..1dcc865029a2
> > --- /dev/null
> > +++ b/include/linux/pgalloc-track.h
> > @@ -0,0 +1,51 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +#ifndef _LINUX_PGALLLC_TRACK_H
> > +#define _LINUX_PGALLLC_TRACK_H
> 
> hm, no #includes.  I guess this is OK, given the limited use.
> 
> But it does make one wonder whether ioremap.c should be moved from lib/
> to mm/ and this file should be moved from include/linux/ to mm/.

It makes sense, but I am anyway planning consolidation of pgalloc.h, so
most probably pgalloc-track will not survive until 5.9-rc1 :)

If you think that it worth moving ioremap.c to mm/ regardless of chrun,
I can send a patch for that.

> Oh well.

-- 
Sincerely yours,
Mike.

^ permalink raw reply

* Re: [PATCH V3 (RESEND) 0/3] arm64: Enable vmemmap mapping from device memory
From: Mike Rapoport @ 2020-06-18  8:56 UTC (permalink / raw)
  To: Anshuman Khandual
  Cc: Mark Rutland, Michal Hocko, linux-ia64, David Hildenbrand,
	Peter Zijlstra, Dave Hansen, linux-mm, Paul Mackerras,
	linux-riscv, Will Deacon, Thomas Gleixner, x86,
	Matthew Wilcox (Oracle), Ingo Molnar, Catalin Marinas, Fenghua Yu,
	Pavel Tatashin, Andy Lutomirski, Paul Walmsley, Dan Williams,
	linux-arm-kernel, Tony Luck, linux-kernel, Palmer Dabbelt,
	Andrew Morton, linuxppc-dev, Kirill A. Shutemov
In-Reply-To: <1592442930-9380-1-git-send-email-anshuman.khandual@arm.com>

On Thu, Jun 18, 2020 at 06:45:27AM +0530, Anshuman Khandual wrote:
> This series enables vmemmap backing memory allocation from device memory
> ranges on arm64. But before that, it enables vmemmap_populate_basepages()
> and vmemmap_alloc_block_buf() to accommodate struct vmem_altmap based
> alocation requests.
> 
> This series applies on 5.8-rc1.
> 
> Pending Question:
> 
> altmap_alloc_block_buf() does not have any other remaining users in
> the tree after this change. Should it be converted into a static
> function and it's declaration be dropped from the header
> (include/linux/mm.h). Avoided doing so because I was not sure if there
> are any off-tree users or not.

Well, off-tree users probably have an active fork anyway so they could
switch to vmemmap_alloc_block_buf()...

Regardless, can you please update Documentation/vm/memory-model.rst to
keep it in sync with the code?

> Changes in V3:
> 
> - Dropped comment from free_hotplug_page_range() per Robin
> - Modified comment in unmap_hotplug_range() per Robin
> - Enabled altmap support in vmemmap_alloc_block_buf() per Robin
> 
> Changes in V2: (https://lkml.org/lkml/2020/3/4/475)
> 
> - Rebased on latest hot-remove series (v14) adding P4D page table support
> 
> Changes in V1: (https://lkml.org/lkml/2020/1/23/12)
> 
> - Added an WARN_ON() in unmap_hotplug_range() when altmap is
>   provided without the page table backing memory being freed
> 
> Changes in RFC V2: (https://lkml.org/lkml/2019/10/21/11)
> 
> - Changed the commit message on 1/2 patch per Will
> - Changed the commit message on 2/2 patch as well
> - Rebased on arm64 memory hot remove series (v10)
> 
> RFC V1: (https://lkml.org/lkml/2019/6/28/32)
> 
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Mark Rutland <mark.rutland@arm.com>
> Cc: Paul Walmsley <paul.walmsley@sifive.com>
> Cc: Palmer Dabbelt <palmer@dabbelt.com>
> Cc: Tony Luck <tony.luck@intel.com>
> Cc: Fenghua Yu <fenghua.yu@intel.com>
> Cc: Dave Hansen <dave.hansen@linux.intel.com>
> Cc: Andy Lutomirski <luto@kernel.org>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: David Hildenbrand <david@redhat.com>
> Cc: Mike Rapoport <rppt@linux.ibm.com>
> Cc: Michal Hocko <mhocko@suse.com>
> Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Cc: Paul Mackerras <paulus@samba.org>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: linux-arm-kernel@lists.infradead.org
> Cc: linux-ia64@vger.kernel.org
> Cc: linux-riscv@lists.infradead.org
> Cc: x86@kernel.org
> Cc: linuxppc-dev@lists.ozlabs.org
> Cc: linux-mm@kvack.org
> Cc: linux-kernel@vger.kernel.org
> 
> Anshuman Khandual (3):
>   mm/sparsemem: Enable vmem_altmap support in vmemmap_populate_basepages()
>   mm/sparsemem: Enable vmem_altmap support in vmemmap_alloc_block_buf()
>   arm64/mm: Enable vmem_altmap support for vmemmap mappings
> 
>  arch/arm64/mm/mmu.c       | 59 ++++++++++++++++++++++++++-------------
>  arch/ia64/mm/discontig.c  |  2 +-
>  arch/powerpc/mm/init_64.c | 10 +++----
>  arch/riscv/mm/init.c      |  2 +-
>  arch/x86/mm/init_64.c     | 12 ++++----
>  include/linux/mm.h        |  8 ++++--
>  mm/sparse-vmemmap.c       | 38 ++++++++++++++++++++-----
>  7 files changed, 87 insertions(+), 44 deletions(-)
> 
> -- 
> 2.20.1
> 

-- 
Sincerely yours,
Mike.

^ permalink raw reply

* Re: [PATCH v2 02/12] ocxl: Change type of pasid to unsigned int
From: Frederic Barrat @ 2020-06-18  8:05 UTC (permalink / raw)
  To: Fenghua Yu, Thomas Gleixner, Ingo Molnar, Borislav Petkov,
	H Peter Anvin, David Woodhouse, Lu Baolu, Andrew Donnellan,
	Felix Kuehling, Joerg Roedel, Dave Hansen, Tony Luck, Ashok Raj,
	Jacob Jun Pan, Dave Jiang, Yu-cheng Yu, Sohil Mehta,
	Ravi V Shankar
  Cc: iommu, x86, linuxppc-dev, linux-kernel, amd-gfx
In-Reply-To: <1592008893-9388-3-git-send-email-fenghua.yu@intel.com>



Le 13/06/2020 à 02:41, Fenghua Yu a écrit :
> PASID is defined as "int" although it's a 20-bit value and shouldn't be
> negative int. To be consistent with type defined in iommu, define PASID
> as "unsigned int".


It looks like this patch was considered because of the use of 'pasid' in 
variable or function names. The ocxl driver only makes sense on powerpc 
and shouldn't compile on anything else, so it's probably useless in the 
context of that series.
The pasid here is defined by the opencapi specification 
(https://opencapi.org), it is borrowed from the PCI world and you could 
argue it could be an unsigned int. But then I think the patch doesn't go 
far enough. But considering it's not used on x86, I think this patch can 
be dropped.

   Fred



> Suggested-by: Thomas Gleixner <tglx@linutronix.de>
> Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
> Reviewed-by: Tony Luck <tony.luck@intel.com>
> ---
> v2:
> - Create this new patch to define PASID as "unsigned int" consistently in
>    ocxl (Thomas)
> 
>   drivers/misc/ocxl/config.c        |  3 ++-
>   drivers/misc/ocxl/link.c          |  6 +++---
>   drivers/misc/ocxl/ocxl_internal.h |  6 +++---
>   drivers/misc/ocxl/pasid.c         |  2 +-
>   drivers/misc/ocxl/trace.h         | 20 ++++++++++----------
>   include/misc/ocxl.h               |  6 +++---
>   6 files changed, 22 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
> index c8e19bfb5ef9..22d034caed3d 100644
> --- a/drivers/misc/ocxl/config.c
> +++ b/drivers/misc/ocxl/config.c
> @@ -806,7 +806,8 @@ int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec)
>   }
>   EXPORT_SYMBOL_GPL(ocxl_config_set_TL);
>   
> -int ocxl_config_terminate_pasid(struct pci_dev *dev, int afu_control, int pasid)
> +int ocxl_config_terminate_pasid(struct pci_dev *dev, int afu_control,
> +				unsigned int pasid)
>   {
>   	u32 val;
>   	unsigned long timeout;
> diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
> index 58d111afd9f6..931f6ae022db 100644
> --- a/drivers/misc/ocxl/link.c
> +++ b/drivers/misc/ocxl/link.c
> @@ -492,7 +492,7 @@ static u64 calculate_cfg_state(bool kernel)
>   	return state;
>   }
>   
> -int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
> +int ocxl_link_add_pe(void *link_handle, unsigned int pasid, u32 pidr, u32 tidr,
>   		u64 amr, struct mm_struct *mm,
>   		void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr),
>   		void *xsl_err_data)
> @@ -572,7 +572,7 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
>   }
>   EXPORT_SYMBOL_GPL(ocxl_link_add_pe);
>   
> -int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid)
> +int ocxl_link_update_pe(void *link_handle, unsigned int pasid, __u16 tid)
>   {
>   	struct ocxl_link *link = (struct ocxl_link *) link_handle;
>   	struct spa *spa = link->spa;
> @@ -608,7 +608,7 @@ int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid)
>   	return rc;
>   }
>   
> -int ocxl_link_remove_pe(void *link_handle, int pasid)
> +int ocxl_link_remove_pe(void *link_handle, unsigned int pasid)
>   {
>   	struct ocxl_link *link = (struct ocxl_link *) link_handle;
>   	struct spa *spa = link->spa;
> diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h
> index 345bf843a38e..3ca982ba7472 100644
> --- a/drivers/misc/ocxl/ocxl_internal.h
> +++ b/drivers/misc/ocxl/ocxl_internal.h
> @@ -41,7 +41,7 @@ struct ocxl_afu {
>   	struct ocxl_afu_config config;
>   	int pasid_base;
>   	int pasid_count; /* opened contexts */
> -	int pasid_max; /* maximum number of contexts */
> +	unsigned int pasid_max; /* maximum number of contexts */
>   	int actag_base;
>   	int actag_enabled;
>   	struct mutex contexts_lock;
> @@ -69,7 +69,7 @@ struct ocxl_xsl_error {
>   
>   struct ocxl_context {
>   	struct ocxl_afu *afu;
> -	int pasid;
> +	unsigned int pasid;
>   	struct mutex status_mutex;
>   	enum ocxl_context_status status;
>   	struct address_space *mapping;
> @@ -128,7 +128,7 @@ int ocxl_config_check_afu_index(struct pci_dev *dev,
>    * pasid: the PASID for the AFU context
>    * tid: the new thread id for the process element
>    */
> -int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid);
> +int ocxl_link_update_pe(void *link_handle, unsigned int pasid, __u16 tid);
>   
>   int ocxl_context_mmap(struct ocxl_context *ctx,
>   			struct vm_area_struct *vma);
> diff --git a/drivers/misc/ocxl/pasid.c b/drivers/misc/ocxl/pasid.c
> index d14cb56e6920..a151fc8f0bec 100644
> --- a/drivers/misc/ocxl/pasid.c
> +++ b/drivers/misc/ocxl/pasid.c
> @@ -80,7 +80,7 @@ static void range_free(struct list_head *head, u32 start, u32 size,
>   
>   int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size)
>   {
> -	int max_pasid;
> +	unsigned int max_pasid;
>   
>   	if (fn->config.max_pasid_log < 0)
>   		return -ENOSPC;
> diff --git a/drivers/misc/ocxl/trace.h b/drivers/misc/ocxl/trace.h
> index 17e21cb2addd..019e2fc63b1d 100644
> --- a/drivers/misc/ocxl/trace.h
> +++ b/drivers/misc/ocxl/trace.h
> @@ -9,13 +9,13 @@
>   #include <linux/tracepoint.h>
>   
>   DECLARE_EVENT_CLASS(ocxl_context,
> -	TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr),
> +	TP_PROTO(pid_t pid, void *spa, unsigned int pasid, u32 pidr, u32 tidr),
>   	TP_ARGS(pid, spa, pasid, pidr, tidr),
>   
>   	TP_STRUCT__entry(
>   		__field(pid_t, pid)
>   		__field(void*, spa)
> -		__field(int, pasid)
> +		__field(unsigned int, pasid)
>   		__field(u32, pidr)
>   		__field(u32, tidr)
>   	),
> @@ -38,21 +38,21 @@ DECLARE_EVENT_CLASS(ocxl_context,
>   );
>   
>   DEFINE_EVENT(ocxl_context, ocxl_context_add,
> -	TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr),
> +	TP_PROTO(pid_t pid, void *spa, unsigned int pasid, u32 pidr, u32 tidr),
>   	TP_ARGS(pid, spa, pasid, pidr, tidr)
>   );
>   
>   DEFINE_EVENT(ocxl_context, ocxl_context_remove,
> -	TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr),
> +	TP_PROTO(pid_t pid, void *spa, unsigned int pasid, u32 pidr, u32 tidr),
>   	TP_ARGS(pid, spa, pasid, pidr, tidr)
>   );
>   
>   TRACE_EVENT(ocxl_terminate_pasid,
> -	TP_PROTO(int pasid, int rc),
> +	TP_PROTO(unsigned int pasid, int rc),
>   	TP_ARGS(pasid, rc),
>   
>   	TP_STRUCT__entry(
> -		__field(int, pasid)
> +		__field(unsigned int, pasid)
>   		__field(int, rc)
>   	),
>   
> @@ -107,11 +107,11 @@ DEFINE_EVENT(ocxl_fault_handler, ocxl_fault_ack,
>   );
>   
>   TRACE_EVENT(ocxl_afu_irq_alloc,
> -	TP_PROTO(int pasid, int irq_id, unsigned int virq, int hw_irq),
> +	TP_PROTO(unsigned int pasid, int irq_id, unsigned int virq, int hw_irq),
>   	TP_ARGS(pasid, irq_id, virq, hw_irq),
>   
>   	TP_STRUCT__entry(
> -		__field(int, pasid)
> +		__field(unsigned int, pasid)
>   		__field(int, irq_id)
>   		__field(unsigned int, virq)
>   		__field(int, hw_irq)
> @@ -133,11 +133,11 @@ TRACE_EVENT(ocxl_afu_irq_alloc,
>   );
>   
>   TRACE_EVENT(ocxl_afu_irq_free,
> -	TP_PROTO(int pasid, int irq_id),
> +	TP_PROTO(unsigned int pasid, int irq_id),
>   	TP_ARGS(pasid, irq_id),
>   
>   	TP_STRUCT__entry(
> -		__field(int, pasid)
> +		__field(unsigned int, pasid)
>   		__field(int, irq_id)
>   	),
>   
> diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h
> index 06dd5839e438..5eca04c8da97 100644
> --- a/include/misc/ocxl.h
> +++ b/include/misc/ocxl.h
> @@ -429,7 +429,7 @@ int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec);
>    * desired AFU. It can be found in the AFU configuration
>    */
>   int ocxl_config_terminate_pasid(struct pci_dev *dev,
> -				int afu_control_offset, int pasid);
> +				int afu_control_offset, unsigned int pasid);
>   
>   /*
>    * Read the configuration space of a function and fill in a
> @@ -466,7 +466,7 @@ void ocxl_link_release(struct pci_dev *dev, void *link_handle);
>    * 'xsl_err_data' is an argument passed to the above callback, if
>    * defined
>    */
> -int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
> +int ocxl_link_add_pe(void *link_handle, unsigned int pasid, u32 pidr, u32 tidr,
>   		u64 amr, struct mm_struct *mm,
>   		void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr),
>   		void *xsl_err_data);
> @@ -474,7 +474,7 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
>   /*
>    * Remove a Process Element from the Shared Process Area for a link
>    */
> -int ocxl_link_remove_pe(void *link_handle, int pasid);
> +int ocxl_link_remove_pe(void *link_handle, unsigned int pasid);
>   
>   /*
>    * Allocate an AFU interrupt associated to the link.
> 

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox