LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v2 3/4] KVM: PPC: Book3S HV: migrate remaining normal-GFNs to secure-GFNs in H_SVM_INIT_DONE
From: Ram Pai @ 2020-06-18  9:19 UTC (permalink / raw)
  To: kvm-ppc, linuxppc-dev
  Cc: ldufour, linuxram, cclaudio, bharata, sathnaga, aneesh.kumar,
	sukadev, bauerman, david
In-Reply-To: <1592471945-24786-1-git-send-email-linuxram@us.ibm.com>

H_SVM_INIT_DONE incorrectly assumes that the Ultravisor has explicitly
called H_SVM_PAGE_IN for all secure pages. These GFNs continue to be
normal GFNs associated with normal PFNs; when infact, these GFNs should
have been secure GFNs, associated with device PFNs.

Move all the PFN associated with the SVM's GFNs, to secure-PFNs, in
H_SVM_INIT_DONE. Skip the GFNs that are already Paged-in or Shared
through H_SVM_PAGE_IN, or Paged-in followed by a Paged-out through
UV_PAGE_OUT.

Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Bharata B Rao <bharata@linux.ibm.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: Laurent Dufour <ldufour@linux.ibm.com>
Cc: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: Claudio Carvalho <cclaudio@linux.ibm.com>
Cc: kvm-ppc@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
---
 Documentation/powerpc/ultravisor.rst |   2 +
 arch/powerpc/kvm/book3s_hv_uvmem.c   | 235 +++++++++++++++++++++++++----------
 2 files changed, 171 insertions(+), 66 deletions(-)

diff --git a/Documentation/powerpc/ultravisor.rst b/Documentation/powerpc/ultravisor.rst
index 363736d..3bc8957 100644
--- a/Documentation/powerpc/ultravisor.rst
+++ b/Documentation/powerpc/ultravisor.rst
@@ -933,6 +933,8 @@ Return values
 	* H_UNSUPPORTED		if called from the wrong context (e.g.
 				from an SVM or before an H_SVM_INIT_START
 				hypercall).
+	* H_STATE		if the hypervisor could not successfully
+                                transition the VM to Secure VM.
 
 Description
 ~~~~~~~~~~~
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index 666d1bb..78f8580 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -339,6 +339,21 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
 	return false;
 }
 
+/* return true, if the GFN is a shared-GFN, or a secure-GFN */
+bool kvmppc_gfn_has_transitioned(unsigned long gfn, struct kvm *kvm)
+{
+	struct kvmppc_uvmem_slot *p;
+
+	list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
+		if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
+			unsigned long index = gfn - p->base_pfn;
+
+			return (p->pfns[index] & KVMPPC_GFN_FLAG_MASK);
+		}
+	}
+	return false;
+}
+
 unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
 {
 	struct kvm_memslots *slots;
@@ -377,14 +392,152 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
 	return ret;
 }
 
+static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm);
+
+/*
+ * Alloc a PFN from private device memory pool. If @pagein is true,
+ * copy page from normal memory to secure memory using UV_PAGE_IN uvcall.
+ */
+static int kvmppc_svm_migrate_page(struct vm_area_struct *vma,
+		unsigned long start,
+		unsigned long end, unsigned long gpa, struct kvm *kvm,
+		unsigned long page_shift,
+		bool pagein)
+{
+	unsigned long src_pfn, dst_pfn = 0;
+	struct migrate_vma mig;
+	struct page *dpage;
+	struct page *spage;
+	unsigned long pfn;
+	int ret = 0;
+
+	memset(&mig, 0, sizeof(mig));
+	mig.vma = vma;
+	mig.start = start;
+	mig.end = end;
+	mig.src = &src_pfn;
+	mig.dst = &dst_pfn;
+
+	ret = migrate_vma_setup(&mig);
+	if (ret)
+		return ret;
+
+	if (!(*mig.src & MIGRATE_PFN_MIGRATE)) {
+		ret = -1;
+		goto out_finalize;
+	}
+
+	dpage = kvmppc_uvmem_get_page(gpa, kvm);
+	if (!dpage) {
+		ret = -1;
+		goto out_finalize;
+	}
+
+	if (pagein) {
+		pfn = *mig.src >> MIGRATE_PFN_SHIFT;
+		spage = migrate_pfn_to_page(*mig.src);
+		if (spage) {
+			ret = uv_page_in(kvm->arch.lpid, pfn << page_shift,
+					gpa, 0, page_shift);
+			if (ret)
+				goto out_finalize;
+		}
+	}
+
+	*mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
+	migrate_vma_pages(&mig);
+out_finalize:
+	migrate_vma_finalize(&mig);
+	return ret;
+}
+
+static int kvmppc_uv_migrate_mem_slot(struct kvm *kvm,
+		const struct kvm_memory_slot *memslot)
+{
+	unsigned long gfn = memslot->base_gfn;
+	unsigned long end;
+	bool downgrade = false;
+	struct vm_area_struct *vma;
+	int i, ret = 0;
+	unsigned long start = gfn_to_hva(kvm, gfn);
+
+	if (kvm_is_error_hva(start))
+		return H_STATE;
+
+	end = start + (memslot->npages << PAGE_SHIFT);
+
+	down_write(&kvm->mm->mmap_sem);
+
+	mutex_lock(&kvm->arch.uvmem_lock);
+	vma = find_vma_intersection(kvm->mm, start, end);
+	if (!vma || vma->vm_start > start || vma->vm_end < end) {
+		ret = H_STATE;
+		goto out_unlock;
+	}
+
+	ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
+			  MADV_UNMERGEABLE, &vma->vm_flags);
+	downgrade_write(&kvm->mm->mmap_sem);
+	downgrade = true;
+	if (ret) {
+		ret = H_STATE;
+		goto out_unlock;
+	}
+
+	for (i = 0; i < memslot->npages; i++, ++gfn) {
+		/*
+		 * skip GFNs that have already tranistioned.
+		 * paged-in GFNs, shared GFNs, paged-in GFNs
+		 * that were later paged-out.
+		 */
+		if (kvmppc_gfn_has_transitioned(gfn, kvm))
+			continue;
+
+		start = gfn_to_hva(kvm, gfn);
+		end = start + (1UL << PAGE_SHIFT);
+		ret = kvmppc_svm_migrate_page(vma, start, end,
+			(gfn << PAGE_SHIFT), kvm, PAGE_SHIFT, false);
+
+		if (ret)
+			goto out_unlock;
+	}
+
+out_unlock:
+	mutex_unlock(&kvm->arch.uvmem_lock);
+	if (downgrade)
+		up_read(&kvm->mm->mmap_sem);
+	else
+		up_write(&kvm->mm->mmap_sem);
+	return ret;
+}
+
 unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
 {
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	int srcu_idx;
+	long ret = H_SUCCESS;
+
 	if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
 		return H_UNSUPPORTED;
 
+	/* migrate any unmoved normal pfn to device pfns*/
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+	slots = kvm_memslots(kvm);
+	kvm_for_each_memslot(memslot, slots) {
+		ret = kvmppc_uv_migrate_mem_slot(kvm, memslot);
+		if (ret) {
+			ret = H_STATE;
+			goto out;
+		}
+	}
+
 	kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE;
 	pr_info("LPID %d went secure\n", kvm->arch.lpid);
-	return H_SUCCESS;
+
+out:
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+	return ret;
 }
 
 /*
@@ -510,68 +663,6 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
 }
 
 /*
- * Alloc a PFN from private device memory pool and copy page from normal
- * memory to secure memory using UV_PAGE_IN uvcall.
- */
-static int kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
-		   unsigned long end, unsigned long gpa, struct kvm *kvm,
-		   unsigned long page_shift, bool *downgrade)
-{
-	unsigned long src_pfn, dst_pfn = 0;
-	struct migrate_vma mig;
-	struct page *spage;
-	unsigned long pfn;
-	struct page *dpage;
-	int ret = 0;
-
-	memset(&mig, 0, sizeof(mig));
-	mig.vma = vma;
-	mig.start = start;
-	mig.end = end;
-	mig.src = &src_pfn;
-	mig.dst = &dst_pfn;
-
-	/*
-	 * We come here with mmap_sem write lock held just for
-	 * ksm_madvise(), otherwise we only need read mmap_sem.
-	 * Hence downgrade to read lock once ksm_madvise() is done.
-	 */
-	ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
-			  MADV_UNMERGEABLE, &vma->vm_flags);
-	downgrade_write(&kvm->mm->mmap_sem);
-	*downgrade = true;
-	if (ret)
-		return ret;
-
-	ret = migrate_vma_setup(&mig);
-	if (ret)
-		return ret;
-
-	if (!(*mig.src & MIGRATE_PFN_MIGRATE)) {
-		ret = -1;
-		goto out_finalize;
-	}
-
-	dpage = kvmppc_uvmem_get_page(gpa, kvm);
-	if (!dpage) {
-		ret = -1;
-		goto out_finalize;
-	}
-
-	pfn = *mig.src >> MIGRATE_PFN_SHIFT;
-	spage = migrate_pfn_to_page(*mig.src);
-	if (spage)
-		uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0,
-			   page_shift);
-
-	*mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
-	migrate_vma_pages(&mig);
-out_finalize:
-	migrate_vma_finalize(&mig);
-	return ret;
-}
-
-/*
  * Shares the page with HV, thus making it a normal page.
  *
  * - If the page is already secure, then provision a new page and share
@@ -676,9 +767,21 @@ unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
 	if (!vma || vma->vm_start > start || vma->vm_end < end)
 		goto out_unlock;
 
-	if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift,
-				&downgrade))
-		ret = H_SUCCESS;
+	ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
+			  MADV_UNMERGEABLE, &vma->vm_flags);
+	downgrade_write(&kvm->mm->mmap_sem);
+	downgrade = true;
+	if (ret) {
+		ret = H_PARAMETER;
+		goto out_unlock;
+	}
+
+	ret = H_PARAMETER;
+	if (kvmppc_svm_migrate_page(vma, start, end, gpa, kvm, page_shift,
+				true))
+		goto out_unlock;
+
+	ret = H_SUCCESS;
 
 out_unlock:
 	mutex_unlock(&kvm->arch.uvmem_lock);
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH v2 2/4] KVM: PPC: Book3S HV: track the state GFNs associated with secure VMs
From: Ram Pai @ 2020-06-18  9:19 UTC (permalink / raw)
  To: kvm-ppc, linuxppc-dev
  Cc: ldufour, linuxram, cclaudio, bharata, sathnaga, aneesh.kumar,
	sukadev, bauerman, david
In-Reply-To: <1592471945-24786-1-git-send-email-linuxram@us.ibm.com>

During the life of SVM, its GFNs transition through normal, secure and
shared states. Since the kernel does not track GFNs that are shared, it
is not possible to disambiguate a shared GFN from a GFN whose PFN has
not yet been migrated to a secure-PFN. Also it is not possible to
disambiguate a secure-GFN from a GFN whose GFN has been pagedout from
the ultravisor.

The ability to identify the state of a GFN is needed to skip migration of its
PFN to secure-PFN during ESM transition.

The code is re-organized to track the states of a GFN as explained
below.

************************************************************************
 1. States of a GFN
    ---------------
 The GFN can be in one of the following states.

 (a) Secure - The GFN is secure. The GFN is associated with
 	a Secure VM, the contents of the GFN is not accessible
 	to the Hypervisor.  This GFN can be backed by a secure-PFN,
 	or can be backed by a normal-PFN with contents encrypted.
 	The former is true when the GFN is paged-in into the
 	ultravisor. The latter is true when the GFN is paged-out
 	of the ultravisor.

 (b) Shared - The GFN is shared. The GFN is associated with a
 	a secure VM. The contents of the GFN is accessible to
 	Hypervisor. This GFN is backed by a normal-PFN and its
 	content is un-encrypted.

 (c) Normal - The GFN is a normal. The GFN is associated with
 	a normal VM. The contents of the GFN is accesible to
 	the Hypervisor. Its content is never encrypted.

 2. States of a VM.
    ---------------

 (a) Normal VM:  A VM whose contents are always accessible to
 	the hypervisor.  All its GFNs are normal-GFNs.

 (b) Secure VM: A VM whose contents are not accessible to the
 	hypervisor without the VM's consent.  Its GFNs are
 	either Shared-GFN or Secure-GFNs.

 (c) Transient VM: A Normal VM that is transitioning to secure VM.
 	The transition starts on successful return of
 	H_SVM_INIT_START, and ends on successful return
 	of H_SVM_INIT_DONE. This transient VM, can have GFNs
 	in any of the three states; i.e Secure-GFN, Shared-GFN,
 	and Normal-GFN.	The VM never executes in this state
 	in supervisor-mode.

 3. Memory slot State.
    ------------------
  	The state of a memory slot mirrors the state of the
  	VM the memory slot is associated with.

 4. VM State transition.
    --------------------

  A VM always starts in Normal Mode.

  H_SVM_INIT_START moves the VM into transient state. During this
  time the Ultravisor may request some of its GFNs to be shared or
  secured. So its GFNs can be in one of the three GFN states.

  H_SVM_INIT_DONE moves the VM entirely from transient state to
  secure-state. At this point any left-over normal-GFNs are
  transitioned to Secure-GFN.

  H_SVM_INIT_ABORT moves the transient VM back to normal VM.
  All its GFNs are moved to Normal-GFNs.

  UV_TERMINATE transitions the secure-VM back to normal-VM. All
  the secure-GFN and shared-GFNs are tranistioned to normal-GFN
  Note: The contents of the normal-GFN is undefined at this point.

 5. GFN state implementation:
    -------------------------

 Secure GFN is associated with a secure-PFN; also called uvmem_pfn,
 when the GFN is paged-in. Its pfn[] has KVMPPC_GFN_UVMEM_PFN flag
 set, and contains the value of the secure-PFN.
 It is associated with a normal-PFN; also called mem_pfn, when
 the GFN is pagedout. Its pfn[] has KVMPPC_GFN_MEM_PFN flag set.
 The value of the normal-PFN is not tracked.

 Shared GFN is associated with a normal-PFN. Its pfn[] has
 KVMPPC_UVMEM_SHARED_PFN flag set. The value of the normal-PFN
 is not tracked.

 Normal GFN is associated with normal-PFN. Its pfn[] has
 no flag set. The value of the normal-PFN is not tracked.

 6. Life cycle of a GFN
    --------------------
 --------------------------------------------------------------
 |        |     Share  |  Unshare | SVM       |H_SVM_INIT_DONE|
 |        |operation   |operation | abort/    |               |
 |        |            |          | terminate |               |
 -------------------------------------------------------------
 |        |            |          |           |               |
 | Secure |     Shared | Secure   |Normal     |Secure         |
 |        |            |          |           |               |
 | Shared |     Shared | Secure   |Normal     |Shared         |
 |        |            |          |           |               |
 | Normal |     Shared | Secure   |Normal     |Secure         |
 --------------------------------------------------------------

 7. Life cycle of a VM
    --------------------
 --------------------------------------------------------------------
 |         |  start    |  H_SVM_  |H_SVM_   |H_SVM_     |UV_SVM_    |
 |         |  VM       |INIT_START|INIT_DONE|INIT_ABORT |TERMINATE  |
 |         |           |          |         |           |           |
 --------- ----------------------------------------------------------
 |         |           |          |         |           |           |
 | Normal  | Normal    | Transient|Error    |Error      |Normal     |
 |         |           |          |         |           |           |
 | Secure  |   Error   | Error    |Error    |Error      |Normal     |
 |         |           |          |         |           |           |
 |Transient|   N/A     | Error    |Secure   |Normal     |Normal     |
 --------------------------------------------------------------------

************************************************************************

Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Bharata B Rao <bharata@linux.ibm.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: Laurent Dufour <ldufour@linux.ibm.com>
Cc: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: Claudio Carvalho <cclaudio@linux.ibm.com>
Cc: kvm-ppc@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Reviewed-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
---
 arch/powerpc/include/asm/kvm_book3s_uvmem.h |   6 +-
 arch/powerpc/kvm/book3s_64_mmu_radix.c      |   2 +-
 arch/powerpc/kvm/book3s_hv.c                |   2 +-
 arch/powerpc/kvm/book3s_hv_uvmem.c          | 195 +++++++++++++++++++++++++---
 4 files changed, 180 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_uvmem.h b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
index 5a9834e..f0c5708 100644
--- a/arch/powerpc/include/asm/kvm_book3s_uvmem.h
+++ b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
@@ -21,7 +21,8 @@ unsigned long kvmppc_h_svm_page_out(struct kvm *kvm,
 int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn);
 unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm);
 void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
-			     struct kvm *kvm, bool skip_page_out);
+			     struct kvm *kvm, bool skip_page_out,
+			     bool purge_gfn);
 #else
 static inline int kvmppc_uvmem_init(void)
 {
@@ -75,6 +76,7 @@ static inline int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn)
 
 static inline void
 kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
-			struct kvm *kvm, bool skip_page_out) { }
+			struct kvm *kvm, bool skip_page_out,
+			bool purge_gfn) { }
 #endif /* CONFIG_PPC_UV */
 #endif /* __ASM_KVM_BOOK3S_UVMEM_H__ */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 803940d..3448459 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -1100,7 +1100,7 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm,
 	unsigned int shift;
 
 	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)
-		kvmppc_uvmem_drop_pages(memslot, kvm, true);
+		kvmppc_uvmem_drop_pages(memslot, kvm, true, false);
 
 	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
 		return;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 6717d24..6cf80e5 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -5482,7 +5482,7 @@ static int kvmhv_svm_off(struct kvm *kvm)
 			continue;
 
 		kvm_for_each_memslot(memslot, slots) {
-			kvmppc_uvmem_drop_pages(memslot, kvm, true);
+			kvmppc_uvmem_drop_pages(memslot, kvm, true, true);
 			uv_unregister_mem_slot(kvm->arch.lpid, memslot->id);
 		}
 	}
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index 3599aaa..666d1bb 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -98,7 +98,127 @@
 static unsigned long *kvmppc_uvmem_bitmap;
 static DEFINE_SPINLOCK(kvmppc_uvmem_bitmap_lock);
 
-#define KVMPPC_UVMEM_PFN	(1UL << 63)
+/*
+ * States of a GFN
+ * ---------------
+ * The GFN can be in one of the following states.
+ *
+ * (a) Secure - The GFN is secure. The GFN is associated with
+ *	a Secure VM, the contents of the GFN is not accessible
+ *	to the Hypervisor.  This GFN can be backed by a secure-PFN,
+ *	or can be backed by a normal-PFN with contents encrypted.
+ *	The former is true when the GFN is paged-in into the
+ *	ultravisor. The latter is true when the GFN is paged-out
+ *	of the ultravisor.
+ *
+ * (b) Shared - The GFN is shared. The GFN is associated with a
+ *	a secure VM. The contents of the GFN is accessible to
+ *	Hypervisor. This GFN is backed by a normal-PFN and its
+ *	content is un-encrypted.
+ *
+ * (c) Normal - The GFN is a normal. The GFN is associated with
+ *	a normal VM. The contents of the GFN is accesible to
+ *	the Hypervisor. Its content is never encrypted.
+ *
+ * States of a VM.
+ * ---------------
+ *
+ * Normal VM:  A VM whose contents are always accessible to
+ *	the hypervisor.  All its GFNs are normal-GFNs.
+ *
+ * Secure VM: A VM whose contents are not accessible to the
+ *	hypervisor without the VM's consent.  Its GFNs are
+ *	either Shared-GFN or Secure-GFNs.
+ *
+ * Transient VM: A Normal VM that is transitioning to secure VM.
+ *	The transition starts on successful return of
+ *	H_SVM_INIT_START, and ends on successful return
+ *	of H_SVM_INIT_DONE. This transient VM, can have GFNs
+ *	in any of the three states; i.e Secure-GFN, Shared-GFN,
+ *	and Normal-GFN.	The VM never executes in this state
+ *	in supervisor-mode.
+ *
+ * Memory slot State.
+ * -----------------------------
+ *	The state of a memory slot mirrors the state of the
+ *	VM the memory slot is associated with.
+ *
+ * VM State transition.
+ * --------------------
+ *
+ *  A VM always starts in Normal Mode.
+ *
+ *  H_SVM_INIT_START moves the VM into transient state. During this
+ *  time the Ultravisor may request some of its GFNs to be shared or
+ *  secured. So its GFNs can be in one of the three GFN states.
+ *
+ *  H_SVM_INIT_DONE moves the VM entirely from transient state to
+ *  secure-state. At this point any left-over normal-GFNs are
+ *  transitioned to Secure-GFN.
+ *
+ *  H_SVM_INIT_ABORT moves the transient VM back to normal VM.
+ *  All its GFNs are moved to Normal-GFNs.
+ *
+ *  UV_TERMINATE transitions the secure-VM back to normal-VM. All
+ *  the secure-GFN and shared-GFNs are tranistioned to normal-GFN
+ *  Note: The contents of the normal-GFN is undefined at this point.
+ *
+ * GFN state implementation:
+ * -------------------------
+ *
+ * Secure GFN is associated with a secure-PFN; also called uvmem_pfn,
+ * when the GFN is paged-in. Its pfn[] has KVMPPC_GFN_UVMEM_PFN flag
+ * set, and contains the value of the secure-PFN.
+ * It is associated with a normal-PFN; also called mem_pfn, when
+ * the GFN is pagedout. Its pfn[] has KVMPPC_GFN_MEM_PFN flag set.
+ * The value of the normal-PFN is not tracked.
+ *
+ * Shared GFN is associated with a normal-PFN. Its pfn[] has
+ * KVMPPC_UVMEM_SHARED_PFN flag set. The value of the normal-PFN
+ * is not tracked.
+ *
+ * Normal GFN is associated with normal-PFN. Its pfn[] has
+ * no flag set. The value of the normal-PFN is not tracked.
+ *
+ * Life cycle of a GFN
+ * --------------------
+ *
+ * --------------------------------------------------------------
+ * |        |     Share  |  Unshare | SVM       |H_SVM_INIT_DONE|
+ * |        |operation   |operation | abort/    |               |
+ * |        |            |          | terminate |               |
+ * -------------------------------------------------------------
+ * |        |            |          |           |               |
+ * | Secure |     Shared | Secure   |Normal     |Secure         |
+ * |        |            |          |           |               |
+ * | Shared |     Shared | Secure   |Normal     |Shared         |
+ * |        |            |          |           |               |
+ * | Normal |     Shared | Secure   |Normal     |Secure         |
+ * --------------------------------------------------------------
+ *
+ * Life cycle of a VM
+ * --------------------
+ *
+ * --------------------------------------------------------------------
+ * |         |  start    |  H_SVM_  |H_SVM_   |H_SVM_     |UV_SVM_    |
+ * |         |  VM       |INIT_START|INIT_DONE|INIT_ABORT |TERMINATE  |
+ * |         |           |          |         |           |           |
+ * --------- ----------------------------------------------------------
+ * |         |           |          |         |           |           |
+ * | Normal  | Normal    | Transient|Error    |Error      |Normal     |
+ * |         |           |          |         |           |           |
+ * | Secure  |   Error   | Error    |Error    |Error      |Normal     |
+ * |         |           |          |         |           |           |
+ * |Transient|   N/A     | Error    |Secure   |Normal     |Normal     |
+ * --------------------------------------------------------------------
+ */
+
+#define KVMPPC_GFN_UVMEM_PFN	(1UL << 63)
+#define KVMPPC_GFN_MEM_PFN	(1UL << 62)
+#define KVMPPC_GFN_SHARED	(1UL << 61)
+#define KVMPPC_GFN_SECURE	(KVMPPC_GFN_UVMEM_PFN | KVMPPC_GFN_MEM_PFN)
+#define KVMPPC_GFN_FLAG_MASK	(KVMPPC_GFN_SECURE | KVMPPC_GFN_SHARED)
+#define KVMPPC_GFN_PFN_MASK	(~KVMPPC_GFN_FLAG_MASK)
 
 struct kvmppc_uvmem_slot {
 	struct list_head list;
@@ -106,11 +226,11 @@ struct kvmppc_uvmem_slot {
 	unsigned long base_pfn;
 	unsigned long *pfns;
 };
-
 struct kvmppc_uvmem_page_pvt {
 	struct kvm *kvm;
 	unsigned long gpa;
 	bool skip_page_out;
+	bool purge_gfn;
 };
 
 int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot)
@@ -154,8 +274,8 @@ void kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot)
 	mutex_unlock(&kvm->arch.uvmem_lock);
 }
 
-static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn,
-				    struct kvm *kvm)
+static void kvmppc_mark_gfn(unsigned long gfn, struct kvm *kvm,
+			unsigned long flag, unsigned long uvmem_pfn)
 {
 	struct kvmppc_uvmem_slot *p;
 
@@ -163,24 +283,41 @@ static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn,
 		if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
 			unsigned long index = gfn - p->base_pfn;
 
-			p->pfns[index] = uvmem_pfn | KVMPPC_UVMEM_PFN;
+			if (flag == KVMPPC_GFN_UVMEM_PFN)
+				p->pfns[index] = uvmem_pfn | flag;
+			else
+				p->pfns[index] = flag;
 			return;
 		}
 	}
 }
 
-static void kvmppc_uvmem_pfn_remove(unsigned long gfn, struct kvm *kvm)
+/* mark the GFN as secure-GFN associated with @uvmem pfn device-PFN. */
+static void kvmppc_gfn_secure_uvmem_pfn(unsigned long gfn,
+			unsigned long uvmem_pfn, struct kvm *kvm)
 {
-	struct kvmppc_uvmem_slot *p;
+	kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_UVMEM_PFN, uvmem_pfn);
+}
 
-	list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
-		if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
-			p->pfns[gfn - p->base_pfn] = 0;
-			return;
-		}
-	}
+/* mark the GFN as secure-GFN associated with a memory-PFN. */
+static void kvmppc_gfn_secure_mem_pfn(unsigned long gfn, struct kvm *kvm)
+{
+	kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_MEM_PFN, 0);
 }
 
+/* mark the GFN as a shared GFN. */
+static void kvmppc_gfn_shared(unsigned long gfn, struct kvm *kvm)
+{
+	kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_SHARED, 0);
+}
+
+/* mark the GFN as a non-existent GFN. */
+static void kvmppc_gfn_remove(unsigned long gfn, struct kvm *kvm)
+{
+	kvmppc_mark_gfn(gfn, kvm, 0, 0);
+}
+
+/* return true, if the GFN is a secure-GFN backed by a secure-PFN */
 static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
 				    unsigned long *uvmem_pfn)
 {
@@ -190,10 +327,10 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
 		if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
 			unsigned long index = gfn - p->base_pfn;
 
-			if (p->pfns[index] & KVMPPC_UVMEM_PFN) {
+			if (p->pfns[index] & KVMPPC_GFN_UVMEM_PFN) {
 				if (uvmem_pfn)
 					*uvmem_pfn = p->pfns[index] &
-						     ~KVMPPC_UVMEM_PFN;
+						     KVMPPC_GFN_PFN_MASK;
 				return true;
 			} else
 				return false;
@@ -257,9 +394,13 @@ unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
  * is HV side fault on these pages. Next we *get* these pages, forcing
  * fault on them, do fault time migration to replace the device PTEs in
  * QEMU page table with normal PTEs from newly allocated pages.
+ *
+ * if @purge_gfn is set, invalidate the GFN. GFN is not shared nor secure
+ * anymore.
  */
 void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
-			     struct kvm *kvm, bool skip_page_out)
+			     struct kvm *kvm, bool skip_page_out,
+			     bool purge_gfn)
 {
 	int i;
 	struct kvmppc_uvmem_page_pvt *pvt;
@@ -270,14 +411,17 @@ void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
 		struct page *uvmem_page;
 
 		mutex_lock(&kvm->arch.uvmem_lock);
+
 		if (!kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
+			if (purge_gfn)
+				kvmppc_gfn_remove(gfn, kvm);
 			mutex_unlock(&kvm->arch.uvmem_lock);
 			continue;
 		}
-
 		uvmem_page = pfn_to_page(uvmem_pfn);
 		pvt = uvmem_page->zone_device_data;
 		pvt->skip_page_out = skip_page_out;
+		pvt->purge_gfn = purge_gfn;
 		mutex_unlock(&kvm->arch.uvmem_lock);
 
 		pfn = gfn_to_pfn(kvm, gfn);
@@ -305,7 +449,7 @@ unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm)
 	srcu_idx = srcu_read_lock(&kvm->srcu);
 
 	kvm_for_each_memslot(memslot, kvm_memslots(kvm))
-		kvmppc_uvmem_drop_pages(memslot, kvm, false);
+		kvmppc_uvmem_drop_pages(memslot, kvm, false, true);
 
 	srcu_read_unlock(&kvm->srcu, srcu_idx);
 
@@ -347,7 +491,7 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
 		goto out_clear;
 
 	uvmem_pfn = bit + pfn_first;
-	kvmppc_uvmem_pfn_insert(gpa >> PAGE_SHIFT, uvmem_pfn, kvm);
+	kvmppc_gfn_secure_uvmem_pfn(gpa >> PAGE_SHIFT, uvmem_pfn, kvm);
 
 	pvt->gpa = gpa;
 	pvt->kvm = kvm;
@@ -454,6 +598,7 @@ static unsigned long kvmppc_share_page(struct kvm *kvm, unsigned long gpa,
 		uvmem_page = pfn_to_page(uvmem_pfn);
 		pvt = uvmem_page->zone_device_data;
 		pvt->skip_page_out = true;
+		pvt->purge_gfn = false;
 	}
 
 retry:
@@ -467,12 +612,16 @@ static unsigned long kvmppc_share_page(struct kvm *kvm, unsigned long gpa,
 		uvmem_page = pfn_to_page(uvmem_pfn);
 		pvt = uvmem_page->zone_device_data;
 		pvt->skip_page_out = true;
+		pvt->purge_gfn = false;
 		kvm_release_pfn_clean(pfn);
 		goto retry;
 	}
 
-	if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, page_shift))
+	if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0,
+				page_shift)) {
+		kvmppc_gfn_shared(gfn, kvm);
 		ret = H_SUCCESS;
+	}
 	kvm_release_pfn_clean(pfn);
 	mutex_unlock(&kvm->arch.uvmem_lock);
 out:
@@ -530,6 +679,7 @@ unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
 	if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift,
 				&downgrade))
 		ret = H_SUCCESS;
+
 out_unlock:
 	mutex_unlock(&kvm->arch.uvmem_lock);
 out:
@@ -655,7 +805,10 @@ static void kvmppc_uvmem_page_free(struct page *page)
 
 	pvt = page->zone_device_data;
 	page->zone_device_data = NULL;
-	kvmppc_uvmem_pfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
+	if (pvt->purge_gfn)
+		kvmppc_gfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
+	else
+		kvmppc_gfn_secure_mem_pfn(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
 	kfree(pvt);
 }
 
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH v2 1/4] KVM: PPC: Book3S HV: Fix function definition in book3s_hv_uvmem.c
From: Ram Pai @ 2020-06-18  9:19 UTC (permalink / raw)
  To: kvm-ppc, linuxppc-dev
  Cc: ldufour, linuxram, cclaudio, bharata, sathnaga, aneesh.kumar,
	sukadev, bauerman, david
In-Reply-To: <1592471945-24786-1-git-send-email-linuxram@us.ibm.com>

Without this fix, git is confused. It generates wrong
function context for code changes in subsequent patches.
Weird, but true.

Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Bharata B Rao <bharata@linux.ibm.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: Laurent Dufour <ldufour@linux.ibm.com>
Cc: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: Claudio Carvalho <cclaudio@linux.ibm.com>
Cc: kvm-ppc@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
---
 arch/powerpc/kvm/book3s_hv_uvmem.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index ad950f89..3599aaa 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -369,8 +369,7 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
  * Alloc a PFN from private device memory pool and copy page from normal
  * memory to secure memory using UV_PAGE_IN uvcall.
  */
-static int
-kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
+static int kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
 		   unsigned long end, unsigned long gpa, struct kvm *kvm,
 		   unsigned long page_shift, bool *downgrade)
 {
@@ -437,8 +436,8 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
  * In the former case, uses dev_pagemap_ops.migrate_to_ram handler
  * to unmap the device page from QEMU's page tables.
  */
-static unsigned long
-kvmppc_share_page(struct kvm *kvm, unsigned long gpa, unsigned long page_shift)
+static unsigned long kvmppc_share_page(struct kvm *kvm, unsigned long gpa,
+		unsigned long page_shift)
 {
 
 	int ret = H_PARAMETER;
@@ -487,9 +486,9 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
  * H_PAGE_IN_SHARED flag makes the page shared which means that the same
  * memory in is visible from both UV and HV.
  */
-unsigned long
-kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
-		     unsigned long flags, unsigned long page_shift)
+unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
+		unsigned long flags,
+		unsigned long page_shift)
 {
 	bool downgrade = false;
 	unsigned long start, end;
@@ -546,10 +545,10 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
  * Provision a new page on HV side and copy over the contents
  * from secure memory using UV_PAGE_OUT uvcall.
  */
-static int
-kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start,
-		    unsigned long end, unsigned long page_shift,
-		    struct kvm *kvm, unsigned long gpa)
+static int kvmppc_svm_page_out(struct vm_area_struct *vma,
+		unsigned long start,
+		unsigned long end, unsigned long page_shift,
+		struct kvm *kvm, unsigned long gpa)
 {
 	unsigned long src_pfn, dst_pfn = 0;
 	struct migrate_vma mig;
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH v2 0/4] Migrate non-migrated pages of a SVM.
From: Ram Pai @ 2020-06-18  9:19 UTC (permalink / raw)
  To: kvm-ppc, linuxppc-dev
  Cc: ldufour, linuxram, cclaudio, bharata, sathnaga, aneesh.kumar,
	sukadev, bauerman, david

This patch series migrates the non-migrated pages of a SVM.
This is required when the UV calls H_SVM_INIT_DONE, and
when a memory-slot is hotplugged to a Secure VM.

Testing: Passed rigorous SVM reboot test using different
	sized SVMs.

Changelog:
	. fixed a bug observed by Bharata. Pages that
	where paged-in and later paged-out must also be
	skipped from migration during H_SVM_INIT_DONE.

Laurent Dufour (1):
  KVM: PPC: Book3S HV: migrate hot plugged memory

Ram Pai (3):
  KVM: PPC: Book3S HV: Fix function definition in book3s_hv_uvmem.c
  KVM: PPC: Book3S HV: track the state GFNs associated with secure VMs
  KVM: PPC: Book3S HV: migrate remaining normal-GFNs to secure-GFNs in
    H_SVM_INIT_DONE

 Documentation/powerpc/ultravisor.rst        |   2 +
 arch/powerpc/include/asm/kvm_book3s_uvmem.h |   8 +-
 arch/powerpc/kvm/book3s_64_mmu_radix.c      |   2 +-
 arch/powerpc/kvm/book3s_hv.c                |  12 +-
 arch/powerpc/kvm/book3s_hv_uvmem.c          | 449 ++++++++++++++++++++++------
 5 files changed, 368 insertions(+), 105 deletions(-)

-- 
1.8.3.1


^ permalink raw reply

* Re: [PATCH] mm: Move p?d_alloc_track to separate header file
From: Mike Rapoport @ 2020-06-18  9:17 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-arch, Stephen Rothwell, jroedel, linux-mm, peterz,
	Linus Torvalds, Joerg Roedel, linuxppc-dev, Steven Rostedt,
	linux-kernel, Abdul Haleem, linux-next, Satheesh Rajendran,
	Andy Lutomirski, manvanth, hch
In-Reply-To: <20200617181226.ab213ea1531b5dd6eca1b0b6@linux-foundation.org>

On Wed, Jun 17, 2020 at 06:12:26PM -0700, Andrew Morton wrote:
> On Tue,  9 Jun 2020 14:05:33 +0200 Joerg Roedel <joro@8bytes.org> wrote:
> 
> > From: Joerg Roedel <jroedel@suse.de>
> > 
> > The functions are only used in two source files, so there is no need
> > for them to be in the global <linux/mm.h> header. Move them to the new
> > <linux/pgalloc-track.h> header and include it only where needed.
> > 
> > ...
> >
> > new file mode 100644
> > index 000000000000..1dcc865029a2
> > --- /dev/null
> > +++ b/include/linux/pgalloc-track.h
> > @@ -0,0 +1,51 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +#ifndef _LINUX_PGALLLC_TRACK_H
> > +#define _LINUX_PGALLLC_TRACK_H
> 
> hm, no #includes.  I guess this is OK, given the limited use.
> 
> But it does make one wonder whether ioremap.c should be moved from lib/
> to mm/ and this file should be moved from include/linux/ to mm/.

It makes sense, but I am anyway planning consolidation of pgalloc.h, so
most probably pgalloc-track will not survive until 5.9-rc1 :)

If you think that it worth moving ioremap.c to mm/ regardless of chrun,
I can send a patch for that.

> Oh well.

-- 
Sincerely yours,
Mike.

^ permalink raw reply

* Re: [PATCH V3 (RESEND) 0/3] arm64: Enable vmemmap mapping from device memory
From: Mike Rapoport @ 2020-06-18  8:56 UTC (permalink / raw)
  To: Anshuman Khandual
  Cc: Mark Rutland, Michal Hocko, linux-ia64, David Hildenbrand,
	Peter Zijlstra, Dave Hansen, linux-mm, Paul Mackerras,
	linux-riscv, Will Deacon, Thomas Gleixner, x86,
	Matthew Wilcox (Oracle), Ingo Molnar, Catalin Marinas, Fenghua Yu,
	Pavel Tatashin, Andy Lutomirski, Paul Walmsley, Dan Williams,
	linux-arm-kernel, Tony Luck, linux-kernel, Palmer Dabbelt,
	Andrew Morton, linuxppc-dev, Kirill A. Shutemov
In-Reply-To: <1592442930-9380-1-git-send-email-anshuman.khandual@arm.com>

On Thu, Jun 18, 2020 at 06:45:27AM +0530, Anshuman Khandual wrote:
> This series enables vmemmap backing memory allocation from device memory
> ranges on arm64. But before that, it enables vmemmap_populate_basepages()
> and vmemmap_alloc_block_buf() to accommodate struct vmem_altmap based
> alocation requests.
> 
> This series applies on 5.8-rc1.
> 
> Pending Question:
> 
> altmap_alloc_block_buf() does not have any other remaining users in
> the tree after this change. Should it be converted into a static
> function and it's declaration be dropped from the header
> (include/linux/mm.h). Avoided doing so because I was not sure if there
> are any off-tree users or not.

Well, off-tree users probably have an active fork anyway so they could
switch to vmemmap_alloc_block_buf()...

Regardless, can you please update Documentation/vm/memory-model.rst to
keep it in sync with the code?

> Changes in V3:
> 
> - Dropped comment from free_hotplug_page_range() per Robin
> - Modified comment in unmap_hotplug_range() per Robin
> - Enabled altmap support in vmemmap_alloc_block_buf() per Robin
> 
> Changes in V2: (https://lkml.org/lkml/2020/3/4/475)
> 
> - Rebased on latest hot-remove series (v14) adding P4D page table support
> 
> Changes in V1: (https://lkml.org/lkml/2020/1/23/12)
> 
> - Added an WARN_ON() in unmap_hotplug_range() when altmap is
>   provided without the page table backing memory being freed
> 
> Changes in RFC V2: (https://lkml.org/lkml/2019/10/21/11)
> 
> - Changed the commit message on 1/2 patch per Will
> - Changed the commit message on 2/2 patch as well
> - Rebased on arm64 memory hot remove series (v10)
> 
> RFC V1: (https://lkml.org/lkml/2019/6/28/32)
> 
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Mark Rutland <mark.rutland@arm.com>
> Cc: Paul Walmsley <paul.walmsley@sifive.com>
> Cc: Palmer Dabbelt <palmer@dabbelt.com>
> Cc: Tony Luck <tony.luck@intel.com>
> Cc: Fenghua Yu <fenghua.yu@intel.com>
> Cc: Dave Hansen <dave.hansen@linux.intel.com>
> Cc: Andy Lutomirski <luto@kernel.org>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: David Hildenbrand <david@redhat.com>
> Cc: Mike Rapoport <rppt@linux.ibm.com>
> Cc: Michal Hocko <mhocko@suse.com>
> Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Cc: Paul Mackerras <paulus@samba.org>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: linux-arm-kernel@lists.infradead.org
> Cc: linux-ia64@vger.kernel.org
> Cc: linux-riscv@lists.infradead.org
> Cc: x86@kernel.org
> Cc: linuxppc-dev@lists.ozlabs.org
> Cc: linux-mm@kvack.org
> Cc: linux-kernel@vger.kernel.org
> 
> Anshuman Khandual (3):
>   mm/sparsemem: Enable vmem_altmap support in vmemmap_populate_basepages()
>   mm/sparsemem: Enable vmem_altmap support in vmemmap_alloc_block_buf()
>   arm64/mm: Enable vmem_altmap support for vmemmap mappings
> 
>  arch/arm64/mm/mmu.c       | 59 ++++++++++++++++++++++++++-------------
>  arch/ia64/mm/discontig.c  |  2 +-
>  arch/powerpc/mm/init_64.c | 10 +++----
>  arch/riscv/mm/init.c      |  2 +-
>  arch/x86/mm/init_64.c     | 12 ++++----
>  include/linux/mm.h        |  8 ++++--
>  mm/sparse-vmemmap.c       | 38 ++++++++++++++++++++-----
>  7 files changed, 87 insertions(+), 44 deletions(-)
> 
> -- 
> 2.20.1
> 

-- 
Sincerely yours,
Mike.

^ permalink raw reply

* Re: [PATCH v2 02/12] ocxl: Change type of pasid to unsigned int
From: Frederic Barrat @ 2020-06-18  8:05 UTC (permalink / raw)
  To: Fenghua Yu, Thomas Gleixner, Ingo Molnar, Borislav Petkov,
	H Peter Anvin, David Woodhouse, Lu Baolu, Andrew Donnellan,
	Felix Kuehling, Joerg Roedel, Dave Hansen, Tony Luck, Ashok Raj,
	Jacob Jun Pan, Dave Jiang, Yu-cheng Yu, Sohil Mehta,
	Ravi V Shankar
  Cc: iommu, x86, linuxppc-dev, linux-kernel, amd-gfx
In-Reply-To: <1592008893-9388-3-git-send-email-fenghua.yu@intel.com>



Le 13/06/2020 à 02:41, Fenghua Yu a écrit :
> PASID is defined as "int" although it's a 20-bit value and shouldn't be
> negative int. To be consistent with type defined in iommu, define PASID
> as "unsigned int".


It looks like this patch was considered because of the use of 'pasid' in 
variable or function names. The ocxl driver only makes sense on powerpc 
and shouldn't compile on anything else, so it's probably useless in the 
context of that series.
The pasid here is defined by the opencapi specification 
(https://opencapi.org), it is borrowed from the PCI world and you could 
argue it could be an unsigned int. But then I think the patch doesn't go 
far enough. But considering it's not used on x86, I think this patch can 
be dropped.

   Fred



> Suggested-by: Thomas Gleixner <tglx@linutronix.de>
> Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
> Reviewed-by: Tony Luck <tony.luck@intel.com>
> ---
> v2:
> - Create this new patch to define PASID as "unsigned int" consistently in
>    ocxl (Thomas)
> 
>   drivers/misc/ocxl/config.c        |  3 ++-
>   drivers/misc/ocxl/link.c          |  6 +++---
>   drivers/misc/ocxl/ocxl_internal.h |  6 +++---
>   drivers/misc/ocxl/pasid.c         |  2 +-
>   drivers/misc/ocxl/trace.h         | 20 ++++++++++----------
>   include/misc/ocxl.h               |  6 +++---
>   6 files changed, 22 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
> index c8e19bfb5ef9..22d034caed3d 100644
> --- a/drivers/misc/ocxl/config.c
> +++ b/drivers/misc/ocxl/config.c
> @@ -806,7 +806,8 @@ int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec)
>   }
>   EXPORT_SYMBOL_GPL(ocxl_config_set_TL);
>   
> -int ocxl_config_terminate_pasid(struct pci_dev *dev, int afu_control, int pasid)
> +int ocxl_config_terminate_pasid(struct pci_dev *dev, int afu_control,
> +				unsigned int pasid)
>   {
>   	u32 val;
>   	unsigned long timeout;
> diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
> index 58d111afd9f6..931f6ae022db 100644
> --- a/drivers/misc/ocxl/link.c
> +++ b/drivers/misc/ocxl/link.c
> @@ -492,7 +492,7 @@ static u64 calculate_cfg_state(bool kernel)
>   	return state;
>   }
>   
> -int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
> +int ocxl_link_add_pe(void *link_handle, unsigned int pasid, u32 pidr, u32 tidr,
>   		u64 amr, struct mm_struct *mm,
>   		void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr),
>   		void *xsl_err_data)
> @@ -572,7 +572,7 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
>   }
>   EXPORT_SYMBOL_GPL(ocxl_link_add_pe);
>   
> -int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid)
> +int ocxl_link_update_pe(void *link_handle, unsigned int pasid, __u16 tid)
>   {
>   	struct ocxl_link *link = (struct ocxl_link *) link_handle;
>   	struct spa *spa = link->spa;
> @@ -608,7 +608,7 @@ int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid)
>   	return rc;
>   }
>   
> -int ocxl_link_remove_pe(void *link_handle, int pasid)
> +int ocxl_link_remove_pe(void *link_handle, unsigned int pasid)
>   {
>   	struct ocxl_link *link = (struct ocxl_link *) link_handle;
>   	struct spa *spa = link->spa;
> diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h
> index 345bf843a38e..3ca982ba7472 100644
> --- a/drivers/misc/ocxl/ocxl_internal.h
> +++ b/drivers/misc/ocxl/ocxl_internal.h
> @@ -41,7 +41,7 @@ struct ocxl_afu {
>   	struct ocxl_afu_config config;
>   	int pasid_base;
>   	int pasid_count; /* opened contexts */
> -	int pasid_max; /* maximum number of contexts */
> +	unsigned int pasid_max; /* maximum number of contexts */
>   	int actag_base;
>   	int actag_enabled;
>   	struct mutex contexts_lock;
> @@ -69,7 +69,7 @@ struct ocxl_xsl_error {
>   
>   struct ocxl_context {
>   	struct ocxl_afu *afu;
> -	int pasid;
> +	unsigned int pasid;
>   	struct mutex status_mutex;
>   	enum ocxl_context_status status;
>   	struct address_space *mapping;
> @@ -128,7 +128,7 @@ int ocxl_config_check_afu_index(struct pci_dev *dev,
>    * pasid: the PASID for the AFU context
>    * tid: the new thread id for the process element
>    */
> -int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid);
> +int ocxl_link_update_pe(void *link_handle, unsigned int pasid, __u16 tid);
>   
>   int ocxl_context_mmap(struct ocxl_context *ctx,
>   			struct vm_area_struct *vma);
> diff --git a/drivers/misc/ocxl/pasid.c b/drivers/misc/ocxl/pasid.c
> index d14cb56e6920..a151fc8f0bec 100644
> --- a/drivers/misc/ocxl/pasid.c
> +++ b/drivers/misc/ocxl/pasid.c
> @@ -80,7 +80,7 @@ static void range_free(struct list_head *head, u32 start, u32 size,
>   
>   int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size)
>   {
> -	int max_pasid;
> +	unsigned int max_pasid;
>   
>   	if (fn->config.max_pasid_log < 0)
>   		return -ENOSPC;
> diff --git a/drivers/misc/ocxl/trace.h b/drivers/misc/ocxl/trace.h
> index 17e21cb2addd..019e2fc63b1d 100644
> --- a/drivers/misc/ocxl/trace.h
> +++ b/drivers/misc/ocxl/trace.h
> @@ -9,13 +9,13 @@
>   #include <linux/tracepoint.h>
>   
>   DECLARE_EVENT_CLASS(ocxl_context,
> -	TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr),
> +	TP_PROTO(pid_t pid, void *spa, unsigned int pasid, u32 pidr, u32 tidr),
>   	TP_ARGS(pid, spa, pasid, pidr, tidr),
>   
>   	TP_STRUCT__entry(
>   		__field(pid_t, pid)
>   		__field(void*, spa)
> -		__field(int, pasid)
> +		__field(unsigned int, pasid)
>   		__field(u32, pidr)
>   		__field(u32, tidr)
>   	),
> @@ -38,21 +38,21 @@ DECLARE_EVENT_CLASS(ocxl_context,
>   );
>   
>   DEFINE_EVENT(ocxl_context, ocxl_context_add,
> -	TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr),
> +	TP_PROTO(pid_t pid, void *spa, unsigned int pasid, u32 pidr, u32 tidr),
>   	TP_ARGS(pid, spa, pasid, pidr, tidr)
>   );
>   
>   DEFINE_EVENT(ocxl_context, ocxl_context_remove,
> -	TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr),
> +	TP_PROTO(pid_t pid, void *spa, unsigned int pasid, u32 pidr, u32 tidr),
>   	TP_ARGS(pid, spa, pasid, pidr, tidr)
>   );
>   
>   TRACE_EVENT(ocxl_terminate_pasid,
> -	TP_PROTO(int pasid, int rc),
> +	TP_PROTO(unsigned int pasid, int rc),
>   	TP_ARGS(pasid, rc),
>   
>   	TP_STRUCT__entry(
> -		__field(int, pasid)
> +		__field(unsigned int, pasid)
>   		__field(int, rc)
>   	),
>   
> @@ -107,11 +107,11 @@ DEFINE_EVENT(ocxl_fault_handler, ocxl_fault_ack,
>   );
>   
>   TRACE_EVENT(ocxl_afu_irq_alloc,
> -	TP_PROTO(int pasid, int irq_id, unsigned int virq, int hw_irq),
> +	TP_PROTO(unsigned int pasid, int irq_id, unsigned int virq, int hw_irq),
>   	TP_ARGS(pasid, irq_id, virq, hw_irq),
>   
>   	TP_STRUCT__entry(
> -		__field(int, pasid)
> +		__field(unsigned int, pasid)
>   		__field(int, irq_id)
>   		__field(unsigned int, virq)
>   		__field(int, hw_irq)
> @@ -133,11 +133,11 @@ TRACE_EVENT(ocxl_afu_irq_alloc,
>   );
>   
>   TRACE_EVENT(ocxl_afu_irq_free,
> -	TP_PROTO(int pasid, int irq_id),
> +	TP_PROTO(unsigned int pasid, int irq_id),
>   	TP_ARGS(pasid, irq_id),
>   
>   	TP_STRUCT__entry(
> -		__field(int, pasid)
> +		__field(unsigned int, pasid)
>   		__field(int, irq_id)
>   	),
>   
> diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h
> index 06dd5839e438..5eca04c8da97 100644
> --- a/include/misc/ocxl.h
> +++ b/include/misc/ocxl.h
> @@ -429,7 +429,7 @@ int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec);
>    * desired AFU. It can be found in the AFU configuration
>    */
>   int ocxl_config_terminate_pasid(struct pci_dev *dev,
> -				int afu_control_offset, int pasid);
> +				int afu_control_offset, unsigned int pasid);
>   
>   /*
>    * Read the configuration space of a function and fill in a
> @@ -466,7 +466,7 @@ void ocxl_link_release(struct pci_dev *dev, void *link_handle);
>    * 'xsl_err_data' is an argument passed to the above callback, if
>    * defined
>    */
> -int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
> +int ocxl_link_add_pe(void *link_handle, unsigned int pasid, u32 pidr, u32 tidr,
>   		u64 amr, struct mm_struct *mm,
>   		void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr),
>   		void *xsl_err_data);
> @@ -474,7 +474,7 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
>   /*
>    * Remove a Process Element from the Shared Process Area for a link
>    */
> -int ocxl_link_remove_pe(void *link_handle, int pasid);
> +int ocxl_link_remove_pe(void *link_handle, unsigned int pasid);
>   
>   /*
>    * Allocate an AFU interrupt associated to the link.
> 

^ permalink raw reply

* [V2 PATCH 3/3] Add support for arm64 to carry over IMA measurement logs
From: Prakhar Srivastava @ 2020-06-18  7:10 UTC (permalink / raw)
  To: linux-arm-kernel, linux-kernel, linuxppc-dev, devicetree,
	linux-integrity, linux-security-module
  Cc: kstewart, mark.rutland, catalin.marinas, bhsharma, tao.li, zohar,
	paulus, vincenzo.frascino, frowand.list, nramas, masahiroy,
	jmorris, takahiro.akashi, serge, pasha.tatashin, will, prsriva,
	robh+dt, hsinyi, tusharsu, tglx, allison, christophe.leroy,
	mbrugger, balajib, dmitry.kasatkin, james.morse, gregkh
In-Reply-To: <20200618071045.471131-1-prsriva@linux.microsoft.com>

Add support for arm64 to carry over IMA measurement logs.
Update arm64 code to call into functions made available in patch 1/3.

---
 arch/arm64/Kconfig                     |  1 +
 arch/arm64/include/asm/ima.h           | 17 ++++++++++
 arch/arm64/include/asm/kexec.h         |  3 ++
 arch/arm64/kernel/machine_kexec_file.c | 47 +++++++++++++++++++++-----
 4 files changed, 60 insertions(+), 8 deletions(-)
 create mode 100644 arch/arm64/include/asm/ima.h

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 5d513f461957..3d544e2e25e6 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1070,6 +1070,7 @@ config KEXEC
 config KEXEC_FILE
 	bool "kexec file based system call"
 	select KEXEC_CORE
+	select HAVE_IMA_KEXEC
 	help
 	  This is new version of kexec system call. This system call is
 	  file based and takes file descriptors as system call argument
diff --git a/arch/arm64/include/asm/ima.h b/arch/arm64/include/asm/ima.h
new file mode 100644
index 000000000000..70ac39b74607
--- /dev/null
+++ b/arch/arm64/include/asm/ima.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ARCH_IMA_H
+#define _ASM_ARCH_IMA_H
+
+struct kimage;
+
+#ifdef CONFIG_IMA_KEXEC
+int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr,
+			      size_t size);
+#else
+static inline int arch_ima_add_kexec_buffer(struct kimage *image,
+			unsigned long load_addr, size_t size)
+{
+	return 0;
+}
+#endif /* CONFIG_IMA_KEXEC */
+#endif /* _ASM_ARCH_IMA_H */
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index d24b527e8c00..7bd60c185ad3 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -100,6 +100,9 @@ struct kimage_arch {
 	void *elf_headers;
 	unsigned long elf_headers_mem;
 	unsigned long elf_headers_sz;
+
+	phys_addr_t ima_buffer_addr;
+	size_t ima_buffer_size;
 };
 
 extern const struct kexec_file_ops kexec_image_ops;
diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
index b40c3b0def92..1e9007c926db 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -24,20 +24,37 @@
 #include <asm/byteorder.h>
 
 /* relevant device tree properties */
-#define FDT_PROP_KEXEC_ELFHDR	"linux,elfcorehdr"
-#define FDT_PROP_MEM_RANGE	"linux,usable-memory-range"
-#define FDT_PROP_INITRD_START	"linux,initrd-start"
-#define FDT_PROP_INITRD_END	"linux,initrd-end"
-#define FDT_PROP_BOOTARGS	"bootargs"
-#define FDT_PROP_KASLR_SEED	"kaslr-seed"
-#define FDT_PROP_RNG_SEED	"rng-seed"
-#define RNG_SEED_SIZE		128
+#define FDT_PROP_KEXEC_ELFHDR		"linux,elfcorehdr"
+#define FDT_PROP_MEM_RANGE		"linux,usable-memory-range"
+#define FDT_PROP_INITRD_START		"linux,initrd-start"
+#define FDT_PROP_INITRD_END		"linux,initrd-end"
+#define FDT_PROP_BOOTARGS		"bootargs"
+#define FDT_PROP_KASLR_SEED		"kaslr-seed"
+#define FDT_PROP_RNG_SEED		"rng-seed"
+#define FDT_PROP_IMA_KEXEC_BUFFER	"linux,ima-kexec-buffer"
+#define RNG_SEED_SIZE			128
 
 const struct kexec_file_ops * const kexec_file_loaders[] = {
 	&kexec_image_ops,
 	NULL
 };
 
+/**
+ * arch_ima_add_kexec_buffer - do arch-specific steps to add the IMA buffer
+ *
+ * Architectures should use this function to pass on the IMA buffer
+ * information to the next kernel.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr,
+			      size_t size)
+{
+	image->arch.ima_buffer_addr = load_addr;
+	image->arch.ima_buffer_size = size;
+	return 0;
+}
+
 int arch_kimage_file_post_load_cleanup(struct kimage *image)
 {
 	vfree(image->arch.dtb);
@@ -66,6 +83,9 @@ static int setup_dtb(struct kimage *image,
 	if (ret && ret != -FDT_ERR_NOTFOUND)
 		goto out;
 	ret = fdt_delprop(dtb, off, FDT_PROP_MEM_RANGE);
+	if (ret && ret != -FDT_ERR_NOTFOUND)
+		goto out;
+	ret = fdt_delprop(dtb, off, FDT_PROP_IMA_KEXEC_BUFFER);
 	if (ret && ret != -FDT_ERR_NOTFOUND)
 		goto out;
 
@@ -119,6 +139,17 @@ static int setup_dtb(struct kimage *image,
 			goto out;
 	}
 
+	if (image->arch.ima_buffer_size > 0) {
+
+		ret = fdt_appendprop_addrrange(dtb, 0, off,
+				FDT_PROP_IMA_KEXEC_BUFFER,
+				image->arch.ima_buffer_addr,
+				image->arch.ima_buffer_size);
+		if (ret)
+			return (ret == -FDT_ERR_NOSPACE ? -ENOMEM : -EINVAL);
+
+	}
+
 	/* add kaslr-seed */
 	ret = fdt_delprop(dtb, off, FDT_PROP_KASLR_SEED);
 	if (ret == -FDT_ERR_NOTFOUND)
-- 
2.25.1


^ permalink raw reply related

* [V2 PATCH 2/3] dt-bindings: chosen: Document ima-kexec-buffer
From: Prakhar Srivastava @ 2020-06-18  7:10 UTC (permalink / raw)
  To: linux-arm-kernel, linux-kernel, linuxppc-dev, devicetree,
	linux-integrity, linux-security-module
  Cc: kstewart, mark.rutland, catalin.marinas, bhsharma, tao.li, zohar,
	paulus, vincenzo.frascino, frowand.list, nramas, masahiroy,
	jmorris, takahiro.akashi, serge, pasha.tatashin, will, prsriva,
	robh+dt, hsinyi, tusharsu, tglx, allison, christophe.leroy,
	mbrugger, balajib, dmitry.kasatkin, james.morse, gregkh
In-Reply-To: <20200618071045.471131-1-prsriva@linux.microsoft.com>

Integrity measurement architecture(IMA) validates if files
have been accidentally or maliciously altered, both remotely and
locally, appraise a file's measurement against a "good" value stored
as an extended attribute, and enforce local file integrity.

IMA also measures singatures of kernel and initrd during kexec along with
the command line used for kexec.
These measurements are critical to verify the seccurity posture of the OS.

Resering memory and adding the memory information to a device tree node
acts as the mechanism to carry over IMA measurement logs.

Update devicetree documentation to reflect the addition of new property
under the chosen node. 

---
 Documentation/devicetree/bindings/chosen.txt | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/Documentation/devicetree/bindings/chosen.txt b/Documentation/devicetree/bindings/chosen.txt
index 45e79172a646..a15f70c007ef 100644
--- a/Documentation/devicetree/bindings/chosen.txt
+++ b/Documentation/devicetree/bindings/chosen.txt
@@ -135,3 +135,20 @@ e.g.
 		linux,initrd-end = <0x82800000>;
 	};
 };
+
+linux,ima-kexec-buffer
+----------------------
+
+This property(currently used by powerpc, arm64) holds the memory range,
+the address and the size, of the IMA measurement logs that are being carried
+over to the kexec session.
+
+/ {
+	chosen {
+		linux,ima-kexec-buffer = <0x9 0x82000000 0x0 0x00008000>;
+	};
+};
+
+This porperty does not represent real hardware, but the memory allocated for
+carrying the IMA measurement logs. The address and the suze are expressed in
+#address-cells and #size-cells, respectively of the root node.
-- 
2.25.1


^ permalink raw reply related

* [V2 PATCH 0/3] Adding support for carrying IMA measurement logs
From: Prakhar Srivastava @ 2020-06-18  7:10 UTC (permalink / raw)
  To: linux-arm-kernel, linux-kernel, linuxppc-dev, devicetree,
	linux-integrity, linux-security-module
  Cc: kstewart, mark.rutland, catalin.marinas, bhsharma, tao.li, zohar,
	paulus, vincenzo.frascino, frowand.list, nramas, masahiroy,
	jmorris, takahiro.akashi, serge, pasha.tatashin, will, prsriva,
	robh+dt, hsinyi, tusharsu, tglx, allison, christophe.leroy,
	mbrugger, balajib, dmitry.kasatkin, james.morse, gregkh

Integrgity Measurement Architecture(IMA) during kexec(kexec file load)
verifies the kernel signature and measures the signature of the kernel.

The signature in the measuremnt logs is used to verfiy the 
authenticity of the kernel in the subsequent kexec'd session, however in
the current implementation IMA measurement logs are not carried over thus
remote attesation cannot verify the signature of the running kernel.

Adding support to arm64 to carry over the IMA measurement logs over kexec.

Add a new chosen node entry linux,ima-kexec-buffer to hold the address and
the size of the memory reserved to carry the IMA measurement log.
Refactor existing powerpc code to be used by amr64 as well.  

Changelog:

v2:
  Break patches into separate patches.
  - Powerpc related Refactoring
  - Updating the docuemntation for chosen node
  - Updating arm64 to support IMA buffer pass

v1:
  Refactoring carrying over IMA measuremnet logs over Kexec. This patch
    moves the non-architecture specific code out of powerpc and adds to
    security/ima.(Suggested by Thiago)
  Add Documentation regarding the ima-kexec-buffer node in the chosen
    node documentation

v0:
  Add a layer of abstraction to use the memory reserved by device tree
    for ima buffer pass.
  Add support for ima buffer pass using reserved memory for arm64 kexec.
    Update the arch sepcific code path in kexec file load to store the
    ima buffer in the reserved memory. The same reserved memory is read
    on kexec or cold boot.

Prakhar Srivastava (3):
  Refactoring powerpc code for carrying over IMA measurement logs, to
    move non architecture specific code to security/ima.
  dt-bindings: chosen: Document ima-kexec-buffer carrying over IMA
    measuremnt logs over kexec.
  Add support for arm64 to carry over IMA measurement logs

 Documentation/devicetree/bindings/chosen.txt |  17 +++
 arch/arm64/Kconfig                           |   1 +
 arch/arm64/include/asm/ima.h                 |  17 +++
 arch/arm64/include/asm/kexec.h               |   3 +
 arch/arm64/kernel/machine_kexec_file.c       |  47 +++++--
 arch/powerpc/include/asm/ima.h               |  10 --
 arch/powerpc/kexec/ima.c                     | 126 ++-----------------
 security/integrity/ima/ima_kexec.c           | 116 +++++++++++++++++
 8 files changed, 201 insertions(+), 136 deletions(-)
 create mode 100644 arch/arm64/include/asm/ima.h

-- 
2.25.1


^ permalink raw reply

* [V2 PATCH 1/3] Refactoring powerpc code for carrying over IMA measurement logs, to move non architecture specific code to security/ima.
From: Prakhar Srivastava @ 2020-06-18  7:10 UTC (permalink / raw)
  To: linux-arm-kernel, linux-kernel, linuxppc-dev, devicetree,
	linux-integrity, linux-security-module
  Cc: kstewart, mark.rutland, catalin.marinas, bhsharma, tao.li, zohar,
	paulus, vincenzo.frascino, frowand.list, nramas, masahiroy,
	jmorris, takahiro.akashi, serge, pasha.tatashin, will, prsriva,
	robh+dt, hsinyi, tusharsu, tglx, allison, christophe.leroy,
	mbrugger, balajib, dmitry.kasatkin, james.morse, gregkh
In-Reply-To: <20200618071045.471131-1-prsriva@linux.microsoft.com>

Powerpc has support to carry over the IMA measurement logs. Refatoring the 
non-architecture specific code out of arch/powerpc and into security/ima.

The code adds support for reserving and freeing up of memory for IMA measurement
logs.

---
 arch/powerpc/include/asm/ima.h     |  10 ---
 arch/powerpc/kexec/ima.c           | 126 ++---------------------------
 security/integrity/ima/ima_kexec.c | 116 ++++++++++++++++++++++++++
 3 files changed, 124 insertions(+), 128 deletions(-)

diff --git a/arch/powerpc/include/asm/ima.h b/arch/powerpc/include/asm/ima.h
index ead488cf3981..c29ec86498f8 100644
--- a/arch/powerpc/include/asm/ima.h
+++ b/arch/powerpc/include/asm/ima.h
@@ -4,15 +4,6 @@
 
 struct kimage;
 
-int ima_get_kexec_buffer(void **addr, size_t *size);
-int ima_free_kexec_buffer(void);
-
-#ifdef CONFIG_IMA
-void remove_ima_buffer(void *fdt, int chosen_node);
-#else
-static inline void remove_ima_buffer(void *fdt, int chosen_node) {}
-#endif
-
 #ifdef CONFIG_IMA_KEXEC
 int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr,
 			      size_t size);
@@ -22,7 +13,6 @@ int setup_ima_buffer(const struct kimage *image, void *fdt, int chosen_node);
 static inline int setup_ima_buffer(const struct kimage *image, void *fdt,
 				   int chosen_node)
 {
-	remove_ima_buffer(fdt, chosen_node);
 	return 0;
 }
 #endif /* CONFIG_IMA_KEXEC */
diff --git a/arch/powerpc/kexec/ima.c b/arch/powerpc/kexec/ima.c
index 720e50e490b6..6054ce91d2a6 100644
--- a/arch/powerpc/kexec/ima.c
+++ b/arch/powerpc/kexec/ima.c
@@ -12,121 +12,6 @@
 #include <linux/memblock.h>
 #include <linux/libfdt.h>
 
-static int get_addr_size_cells(int *addr_cells, int *size_cells)
-{
-	struct device_node *root;
-
-	root = of_find_node_by_path("/");
-	if (!root)
-		return -EINVAL;
-
-	*addr_cells = of_n_addr_cells(root);
-	*size_cells = of_n_size_cells(root);
-
-	of_node_put(root);
-
-	return 0;
-}
-
-static int do_get_kexec_buffer(const void *prop, int len, unsigned long *addr,
-			       size_t *size)
-{
-	int ret, addr_cells, size_cells;
-
-	ret = get_addr_size_cells(&addr_cells, &size_cells);
-	if (ret)
-		return ret;
-
-	if (len < 4 * (addr_cells + size_cells))
-		return -ENOENT;
-
-	*addr = of_read_number(prop, addr_cells);
-	*size = of_read_number(prop + 4 * addr_cells, size_cells);
-
-	return 0;
-}
-
-/**
- * ima_get_kexec_buffer - get IMA buffer from the previous kernel
- * @addr:	On successful return, set to point to the buffer contents.
- * @size:	On successful return, set to the buffer size.
- *
- * Return: 0 on success, negative errno on error.
- */
-int ima_get_kexec_buffer(void **addr, size_t *size)
-{
-	int ret, len;
-	unsigned long tmp_addr;
-	size_t tmp_size;
-	const void *prop;
-
-	prop = of_get_property(of_chosen, "linux,ima-kexec-buffer", &len);
-	if (!prop)
-		return -ENOENT;
-
-	ret = do_get_kexec_buffer(prop, len, &tmp_addr, &tmp_size);
-	if (ret)
-		return ret;
-
-	*addr = __va(tmp_addr);
-	*size = tmp_size;
-
-	return 0;
-}
-
-/**
- * ima_free_kexec_buffer - free memory used by the IMA buffer
- */
-int ima_free_kexec_buffer(void)
-{
-	int ret;
-	unsigned long addr;
-	size_t size;
-	struct property *prop;
-
-	prop = of_find_property(of_chosen, "linux,ima-kexec-buffer", NULL);
-	if (!prop)
-		return -ENOENT;
-
-	ret = do_get_kexec_buffer(prop->value, prop->length, &addr, &size);
-	if (ret)
-		return ret;
-
-	ret = of_remove_property(of_chosen, prop);
-	if (ret)
-		return ret;
-
-	return memblock_free(addr, size);
-
-}
-
-/**
- * remove_ima_buffer - remove the IMA buffer property and reservation from @fdt
- *
- * The IMA measurement buffer is of no use to a subsequent kernel, so we always
- * remove it from the device tree.
- */
-void remove_ima_buffer(void *fdt, int chosen_node)
-{
-	int ret, len;
-	unsigned long addr;
-	size_t size;
-	const void *prop;
-
-	prop = fdt_getprop(fdt, chosen_node, "linux,ima-kexec-buffer", &len);
-	if (!prop)
-		return;
-
-	ret = do_get_kexec_buffer(prop, len, &addr, &size);
-	fdt_delprop(fdt, chosen_node, "linux,ima-kexec-buffer");
-	if (ret)
-		return;
-
-	ret = delete_fdt_mem_rsv(fdt, addr, size);
-	if (!ret)
-		pr_debug("Removed old IMA buffer reservation.\n");
-}
-
 #ifdef CONFIG_IMA_KEXEC
 /**
  * arch_ima_add_kexec_buffer - do arch-specific steps to add the IMA buffer
@@ -179,13 +64,18 @@ int setup_ima_buffer(const struct kimage *image, void *fdt, int chosen_node)
 	int ret, addr_cells, size_cells, entry_size;
 	u8 value[16];
 
-	remove_ima_buffer(fdt, chosen_node);
 	if (!image->arch.ima_buffer_size)
 		return 0;
 
-	ret = get_addr_size_cells(&addr_cells, &size_cells);
-	if (ret)
+	ret = fdt_address_cells(fdt, chosen_node);
+	if (ret < 0)
+		return ret;
+	addr_cells = ret;
+
+	ret = fdt_size_cells(fdt, chosen_node);
+	if (ret < 0)
 		return ret;
+	size_cells = ret;
 
 	entry_size = 4 * (addr_cells + size_cells);
 
diff --git a/security/integrity/ima/ima_kexec.c b/security/integrity/ima/ima_kexec.c
index 121de3e04af2..e1e6d6154015 100644
--- a/security/integrity/ima/ima_kexec.c
+++ b/security/integrity/ima/ima_kexec.c
@@ -10,8 +10,124 @@
 #include <linux/seq_file.h>
 #include <linux/vmalloc.h>
 #include <linux/kexec.h>
+#include <linux/of.h>
+#include <linux/memblock.h>
+#include <linux/libfdt.h>
 #include "ima.h"
 
+static int get_addr_size_cells(int *addr_cells, int *size_cells)
+{
+	struct device_node *root;
+
+	root = of_find_node_by_path("/");
+	if (!root)
+		return -EINVAL;
+
+	*addr_cells = of_n_addr_cells(root);
+	*size_cells = of_n_size_cells(root);
+
+	of_node_put(root);
+
+	return 0;
+}
+
+static int do_get_kexec_buffer(const void *prop, int len, unsigned long *addr,
+			       size_t *size)
+{
+	int ret, addr_cells, size_cells;
+
+	ret = get_addr_size_cells(&addr_cells, &size_cells);
+	if (ret)
+		return ret;
+
+	if (len < 4 * (addr_cells + size_cells))
+		return -ENOENT;
+
+	*addr = of_read_number(prop, addr_cells);
+	*size = of_read_number(prop + 4 * addr_cells, size_cells);
+
+	return 0;
+}
+
+/**
+ * ima_get_kexec_buffer - get IMA buffer from the previous kernel
+ * @addr:	On successful return, set to point to the buffer contents.
+ * @size:	On successful return, set to the buffer size.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int ima_get_kexec_buffer(void **addr, size_t *size)
+{
+	int ret, len;
+	unsigned long tmp_addr;
+	size_t tmp_size;
+	const void *prop;
+
+	prop = of_get_property(of_chosen, "linux,ima-kexec-buffer", &len);
+	if (!prop)
+		return -ENOENT;
+
+	ret = do_get_kexec_buffer(prop, len, &tmp_addr, &tmp_size);
+	if (ret)
+		return ret;
+
+	*addr = __va(tmp_addr);
+	*size = tmp_size;
+
+	return 0;
+}
+
+/**
+ * ima_free_kexec_buffer - free memory used by the IMA buffer
+ */
+int ima_free_kexec_buffer(void)
+{
+	int ret;
+	unsigned long addr;
+	size_t size;
+	struct property *prop;
+
+	prop = of_find_property(of_chosen, "linux,ima-kexec-buffer", NULL);
+	if (!prop)
+		return -ENOENT;
+
+	ret = do_get_kexec_buffer(prop->value, prop->length, &addr, &size);
+	if (ret)
+		return ret;
+
+	ret = of_remove_property(of_chosen, prop);
+	if (ret)
+		return ret;
+
+	return memblock_free(__pa(addr), size);
+
+}
+
+/**
+ * remove_ima_buffer - remove the IMA buffer property and reservation from @fdt
+ *
+ * The IMA measurement buffer is of no use to a subsequent kernel, so we always
+ * remove it from the device tree.
+ */
+void remove_ima_buffer(void *fdt, int chosen_node)
+{
+	int ret, len;
+	unsigned long addr;
+	size_t size;
+	const void *prop;
+
+	prop = fdt_getprop(fdt, chosen_node, "linux,ima-kexec-buffer", &len);
+	if (!prop)
+		return;
+
+	do_get_kexec_buffer(prop, len, &addr, &size);
+	ret = fdt_delprop(fdt, chosen_node, "linux,ima-kexec-buffer");
+	if (ret < 0)
+		return;
+
+	memblock_free(addr, size);
+}
+
 #ifdef CONFIG_IMA_KEXEC
 static int ima_dump_measurement_list(unsigned long *buffer_size, void **buffer,
 				     unsigned long segment_size)
-- 
2.25.1


^ permalink raw reply related

* Re: [PATCH v5 01/13] powerpc: Remove Xilinx PPC405/PPC440 support
From: Nathan Chancellor @ 2020-06-18  3:16 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Arnd Bergmann, Nick Desaulniers, Michal Simek, LKML,
	clang-built-linux, Paul Mackerras, linuxppc-dev
In-Reply-To: <87tuz9ci7e.fsf@mpe.ellerman.id.au>

On Thu, Jun 18, 2020 at 10:48:21AM +1000, Michael Ellerman wrote:
> Nick Desaulniers <ndesaulniers@google.com> writes:
> > On Wed, Jun 17, 2020 at 3:20 AM Michael Ellerman <mpe@ellerman.id.au> wrote:
> >> Michael Ellerman <mpe@ellerman.id.au> writes:
> >> > Michal Simek <michal.simek@xilinx.com> writes:
> >> <snip>
> >>
> >> >> Or if bamboo requires uImage to be built by default you can do it via
> >> >> Kconfig.
> >> >>
> >> >> diff --git a/arch/powerpc/platforms/44x/Kconfig
> >> >> b/arch/powerpc/platforms/44x/Kconfig
> >> >> index 39e93d23fb38..300864d7b8c9 100644
> >> >> --- a/arch/powerpc/platforms/44x/Kconfig
> >> >> +++ b/arch/powerpc/platforms/44x/Kconfig
> >> >> @@ -13,6 +13,7 @@ config BAMBOO
> >> >>         select PPC44x_SIMPLE
> >> >>         select 440EP
> >> >>         select FORCE_PCI
> >> >> +       select DEFAULT_UIMAGE
> >> >>         help
> >> >>           This option enables support for the IBM PPC440EP evaluation board.
> >> >
> >> > Who knows what the actual bamboo board used. But I'd be happy to take a
> >> > SOB'ed patch to do the above, because these days the qemu emulation is
> >> > much more likely to be used than the actual board.
> >>
> >> I just went to see why my CI boot of 44x didn't catch this, and it's
> >> because I don't use the uImage, I just boot the vmlinux directly:
> >>
> >>   $ qemu-system-ppc -M bamboo -m 128m -display none -kernel build~/vmlinux -append "console=ttyS0" -display none -nodefaults -serial mon:stdio
> >>   Linux version 5.8.0-rc1-00118-g69119673bd50 (michael@alpine1-p1) (gcc (Ubuntu 9.3.0-10ubuntu2) 9.3.0, GNU ld (GNU Binutils for Ubuntu) 2.34) #4 Wed Jun 17 20:19:22 AEST 2020
> >>   Using PowerPC 44x Platform machine description
> >>   ioremap() called early from find_legacy_serial_ports+0x690/0x770. Use early_ioremap() instead
> >>   printk: bootconsole [udbg0] enabled
> >>
> >>
> >> So that's probably the simplest solution?
> >
> > If the uImage or zImage self decompresses, I would prefer to test that as well.
> 
> The uImage is decompressed by qemu AIUI.
> 
> >> That means previously arch/powerpc/boot/zImage was just a hardlink to
> >> the uImage:
> >
> > It sounds like we can just boot the zImage, or is that no longer
> > created with the uImage?
> 
> The zImage won't boot on bamboo.
> 
> Because of the vagaries of the arch/powerpc/boot/Makefile the zImage
> ends up pointing to treeImage.ebony, which is for a different board.
> 
> The zImage link is made to the first item in $(image-y):
> 
> $(obj)/zImage:		$(addprefix $(obj)/, $(image-y))
> 	$(Q)rm -f $@; ln $< $@
>                          ^
>                          first preqrequisite
> 
> Which for this defconfig happens to be:
> 
> image-$(CONFIG_EBONY)			+= treeImage.ebony cuImage.ebony
> 
> If you turned off CONFIG_EBONY then the zImage will be a link to
> treeImage.bamboo, but qemu can't boot that either.
> 
> It's kind of nuts that the zImage points to some arbitrary image
> depending on what's configured and the order of things in the Makefile.
> But I'm not sure how we make it less nuts without risking breaking
> people's existing setups.
> 
> cheers

Hi Michael,

For what it's worth, this is squared this away in terms of our CI by
just building and booting the uImage directly, rather than implicitly
using the zImage:

https://github.com/ClangBuiltLinux/continuous-integration/pull/282
https://github.com/ClangBuiltLinux/boot-utils/pull/22

We were only using the zImage because that is what Joel Stanley intially
set us up with when PowerPC 32-bit was added to our CI:

https://github.com/ClangBuiltLinux/continuous-integration/pull/100

Admittedly, we really do not have many PowerPC experts in our
organization so we are supporting it on a "best effort" basis, which
often involves using whatever knowledge is floating around or can be
gained from interactions such as this :) so thank you for that!

Cheers,
Nathan

^ permalink raw reply

* [powerpc:next-test] BUILD SUCCESS 9d7d80a443b962d67db4fbd4b832081f4d7df4a8
From: kernel test robot @ 2020-06-18  3:06 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  next-test
branch HEAD: 9d7d80a443b962d67db4fbd4b832081f4d7df4a8  powerpc/powernv/ioda: Return correct error if TCE level allocation failed

elapsed time: 723m

configs tested: 5
configs skipped: 105

The following configs have been built successfully.
More configs may be tested in the coming days.

powerpc                             defconfig
powerpc                          allyesconfig
powerpc                          rhel-kconfig
powerpc                          allmodconfig
powerpc                           allnoconfig

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* [powerpc:fixes-test] BUILD SUCCESS b55129f97aeefd265314e12d98935330e011a14a
From: kernel test robot @ 2020-06-18  3:06 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  fixes-test
branch HEAD: b55129f97aeefd265314e12d98935330e011a14a  powerpc/8xx: Provide ptep_get() with 16k pages

elapsed time: 723m

configs tested: 94
configs skipped: 4

The following configs have been built successfully.
More configs may be tested in the coming days.

arm                                 defconfig
arm                              allyesconfig
arm                              allmodconfig
arm                               allnoconfig
arm64                            allyesconfig
arm64                               defconfig
arm64                            allmodconfig
arm64                             allnoconfig
arm                        clps711x_defconfig
powerpc                      ep88xc_defconfig
arm                            xcep_defconfig
arm                          iop32x_defconfig
mips                           xway_defconfig
sparc                            alldefconfig
arm                         mv78xx0_defconfig
powerpc                     mpc83xx_defconfig
i386                             allyesconfig
i386                                defconfig
i386                              debian-10.3
i386                              allnoconfig
ia64                             allmodconfig
ia64                                defconfig
ia64                              allnoconfig
ia64                             allyesconfig
m68k                             allmodconfig
m68k                              allnoconfig
m68k                           sun3_defconfig
m68k                                defconfig
m68k                             allyesconfig
nds32                               defconfig
nds32                             allnoconfig
csky                             allyesconfig
csky                                defconfig
alpha                               defconfig
alpha                            allyesconfig
xtensa                              defconfig
xtensa                           allyesconfig
h8300                            allyesconfig
h8300                            allmodconfig
arc                                 defconfig
sh                               allmodconfig
sh                                allnoconfig
microblaze                        allnoconfig
arc                              allyesconfig
nios2                               defconfig
nios2                            allyesconfig
openrisc                            defconfig
c6x                              allyesconfig
c6x                               allnoconfig
openrisc                         allyesconfig
mips                             allyesconfig
mips                              allnoconfig
mips                             allmodconfig
parisc                            allnoconfig
parisc                              defconfig
parisc                           allyesconfig
parisc                           allmodconfig
powerpc                          rhel-kconfig
powerpc                             defconfig
powerpc                          allyesconfig
powerpc                          allmodconfig
powerpc                           allnoconfig
i386                 randconfig-a015-20200617
i386                 randconfig-a011-20200617
i386                 randconfig-a014-20200617
i386                 randconfig-a016-20200617
i386                 randconfig-a013-20200617
i386                 randconfig-a012-20200617
riscv                            allyesconfig
riscv                             allnoconfig
riscv                               defconfig
riscv                            allmodconfig
s390                             allyesconfig
s390                              allnoconfig
s390                             allmodconfig
s390                                defconfig
sparc                            allyesconfig
sparc                               defconfig
sparc64                             defconfig
sparc64                           allnoconfig
sparc64                          allyesconfig
sparc64                          allmodconfig
um                               allmodconfig
um                                allnoconfig
um                               allyesconfig
um                                  defconfig
x86_64                                   rhel
x86_64                         rhel-7.2-clear
x86_64                                    lkp
x86_64                              fedora-25
x86_64                               rhel-8.3
x86_64                               rhel-7.6
x86_64                    rhel-7.6-kselftests
x86_64                                  kexec

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* linux-next: manual merge of the pidfd tree with the powerpc-fixes tree
From: Stephen Rothwell @ 2020-06-18  2:11 UTC (permalink / raw)
  To: Christian Brauner, Michael Ellerman, PowerPC
  Cc: Linux Next Mailing List, Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 1280 bytes --]

Hi all,

Today's linux-next merge of the pidfd tree got a conflict in:

  arch/powerpc/kernel/syscalls/syscall.tbl

between commit:

  35e32a6cb5f6 ("powerpc/syscalls: Split SPU-ness out of ABI")

from the powerpc-fixes tree and commit:

  9b4feb630e8e ("arch: wire-up close_range()")

from the pidfd tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc arch/powerpc/kernel/syscalls/syscall.tbl
index c0cdaacd770e,dd87a782d80e..000000000000
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@@ -480,6 -524,8 +480,7 @@@
  434	common	pidfd_open			sys_pidfd_open
  435	32	clone3				ppc_clone3			sys_clone3
  435	64	clone3				sys_clone3
 -435	spu	clone3				sys_ni_syscall
+ 436	common	close_range			sys_close_range
  437	common	openat2				sys_openat2
  438	common	pidfd_getfd			sys_pidfd_getfd
  439	common	faccessat2			sys_faccessat2

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* [PATCH AUTOSEL 4.4 52/60] ASoC: fsl_asrc_dma: Fix dma_chan leak when config DMA channel failed
From: Sasha Levin @ 2020-06-18  1:29 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Sasha Levin, Xin Tan, Xiyu Yang, alsa-devel, Mark Brown,
	linuxppc-dev
In-Reply-To: <20200618013004.610532-1-sashal@kernel.org>

From: Xiyu Yang <xiyuyang19@fudan.edu.cn>

[ Upstream commit 36124fb19f1ae68a500cd76a76d40c6e81bee346 ]

fsl_asrc_dma_hw_params() invokes dma_request_channel() or
fsl_asrc_get_dma_channel(), which returns a reference of the specified
dma_chan object to "pair->dma_chan[dir]" with increased refcnt.

The reference counting issue happens in one exception handling path of
fsl_asrc_dma_hw_params(). When config DMA channel failed for Back-End,
the function forgets to decrease the refcnt increased by
dma_request_channel() or fsl_asrc_get_dma_channel(), causing a refcnt
leak.

Fix this issue by calling dma_release_channel() when config DMA channel
failed.

Signed-off-by: Xiyu Yang <xiyuyang19@fudan.edu.cn>
Signed-off-by: Xin Tan <tanxin.ctf@gmail.com>
Link: https://lore.kernel.org/r/1590415966-52416-1-git-send-email-xiyuyang19@fudan.edu.cn
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 sound/soc/fsl/fsl_asrc_dma.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/fsl/fsl_asrc_dma.c b/sound/soc/fsl/fsl_asrc_dma.c
index ffc000bc1f15..56a873ba08e4 100644
--- a/sound/soc/fsl/fsl_asrc_dma.c
+++ b/sound/soc/fsl/fsl_asrc_dma.c
@@ -243,6 +243,7 @@ static int fsl_asrc_dma_hw_params(struct snd_pcm_substream *substream,
 	ret = dmaengine_slave_config(pair->dma_chan[dir], &config_be);
 	if (ret) {
 		dev_err(dev, "failed to config DMA channel for Back-End\n");
+		dma_release_channel(pair->dma_chan[dir]);
 		return ret;
 	}
 
-- 
2.25.1


^ permalink raw reply related

* [PATCH AUTOSEL 4.4 38/60] powerpc/pseries/ras: Fix FWNMI_VALID off by one
From: Sasha Levin @ 2020-06-18  1:29 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Sasha Levin, linuxppc-dev, Mahesh Salgaonkar, Nicholas Piggin
In-Reply-To: <20200618013004.610532-1-sashal@kernel.org>

From: Nicholas Piggin <npiggin@gmail.com>

[ Upstream commit deb70f7a35a22dffa55b2c3aac71bc6fb0f486ce ]

This was discovered developing qemu fwnmi sreset support. This
off-by-one bug means the last 16 bytes of the rtas area can not
be used for a 16 byte save area.

It's not a serious bug, and QEMU implementation has to retain a
workaround for old kernels, but it's good to tighten it.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>
Link: https://lore.kernel.org/r/20200508043408.886394-7-npiggin@gmail.com
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 arch/powerpc/platforms/pseries/ras.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 9795e52bab3d..9e817c1b7808 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -265,10 +265,11 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
 /*
  * Some versions of FWNMI place the buffer inside the 4kB page starting at
  * 0x7000. Other versions place it inside the rtas buffer. We check both.
+ * Minimum size of the buffer is 16 bytes.
  */
 #define VALID_FWNMI_BUFFER(A) \
-	((((A) >= 0x7000) && ((A) < 0x7ff0)) || \
-	(((A) >= rtas.base) && ((A) < (rtas.base + rtas.size - 16))))
+	((((A) >= 0x7000) && ((A) <= 0x8000 - 16)) || \
+	(((A) >= rtas.base) && ((A) <= (rtas.base + rtas.size - 16))))
 
 /*
  * Get the error information for errors coming through the
-- 
2.25.1


^ permalink raw reply related

* [PATCH AUTOSEL 4.4 39/60] powerpc/ps3: Fix kexec shutdown hang
From: Sasha Levin @ 2020-06-18  1:29 UTC (permalink / raw)
  To: linux-kernel, stable; +Cc: Geoff Levand, linuxppc-dev, Sasha Levin
In-Reply-To: <20200618013004.610532-1-sashal@kernel.org>

From: Geoff Levand <geoff@infradead.org>

[ Upstream commit 126554465d93b10662742128918a5fc338cda4aa ]

The ps3_mm_region_destroy() and ps3_mm_vas_destroy() routines
are called very late in the shutdown via kexec's mmu_cleanup_all
routine.  By the time mmu_cleanup_all runs it is too late to use
udbg_printf, and calling it will cause PS3 systems to hang.

Remove all debugging statements from ps3_mm_region_destroy() and
ps3_mm_vas_destroy() and replace any error reporting with calls
to lv1_panic.

With this change builds with 'DEBUG' defined will not cause kexec
reboots to hang, and builds with 'DEBUG' defined or not will end
in lv1_panic if an error is encountered.

Signed-off-by: Geoff Levand <geoff@infradead.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/7325c4af2b4c989c19d6a26b90b1fec9c0615ddf.1589049250.git.geoff@infradead.org
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 arch/powerpc/platforms/ps3/mm.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
index b0f34663b1ae..19bae78b1f25 100644
--- a/arch/powerpc/platforms/ps3/mm.c
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -212,13 +212,14 @@ void ps3_mm_vas_destroy(void)
 {
 	int result;
 
-	DBG("%s:%d: map.vas_id    = %llu\n", __func__, __LINE__, map.vas_id);
-
 	if (map.vas_id) {
 		result = lv1_select_virtual_address_space(0);
-		BUG_ON(result);
-		result = lv1_destruct_virtual_address_space(map.vas_id);
-		BUG_ON(result);
+		result += lv1_destruct_virtual_address_space(map.vas_id);
+
+		if (result) {
+			lv1_panic(0);
+		}
+
 		map.vas_id = 0;
 	}
 }
@@ -316,19 +317,20 @@ static void ps3_mm_region_destroy(struct mem_region *r)
 	int result;
 
 	if (!r->destroy) {
-		pr_info("%s:%d: Not destroying high region: %llxh %llxh\n",
-			__func__, __LINE__, r->base, r->size);
 		return;
 	}
 
-	DBG("%s:%d: r->base = %llxh\n", __func__, __LINE__, r->base);
-
 	if (r->base) {
 		result = lv1_release_memory(r->base);
-		BUG_ON(result);
+
+		if (result) {
+			lv1_panic(0);
+		}
+
 		r->size = r->base = r->offset = 0;
 		map.total = map.rm.size;
 	}
+
 	ps3_mm_set_repository_highmem(NULL);
 }
 
-- 
2.25.1


^ permalink raw reply related

* [PATCH AUTOSEL 4.4 26/60] tty: hvc: Fix data abort due to race in hvc_open
From: Sasha Levin @ 2020-06-18  1:29 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Raghavendra Rao Ananta, Greg Kroah-Hartman, linuxppc-dev,
	Sasha Levin
In-Reply-To: <20200618013004.610532-1-sashal@kernel.org>

From: Raghavendra Rao Ananta <rananta@codeaurora.org>

[ Upstream commit e2bd1dcbe1aa34ff5570b3427c530e4332ecf0fe ]

Potentially, hvc_open() can be called in parallel when two tasks calls
open() on /dev/hvcX. In such a scenario, if the hp->ops->notifier_add()
callback in the function fails, where it sets the tty->driver_data to
NULL, the parallel hvc_open() can see this NULL and cause a memory abort.
Hence, serialize hvc_open and check if tty->private_data is NULL before
proceeding ahead.

The issue can be easily reproduced by launching two tasks simultaneously
that does nothing but open() and close() on /dev/hvcX.
For example:
$ ./simple_open_close /dev/hvc0 & ./simple_open_close /dev/hvc0 &

Signed-off-by: Raghavendra Rao Ananta <rananta@codeaurora.org>
Link: https://lore.kernel.org/r/20200428032601.22127-1-rananta@codeaurora.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/tty/hvc/hvc_console.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c
index acf6d143c753..81f23af8beca 100644
--- a/drivers/tty/hvc/hvc_console.c
+++ b/drivers/tty/hvc/hvc_console.c
@@ -89,6 +89,8 @@ static LIST_HEAD(hvc_structs);
  */
 static DEFINE_SPINLOCK(hvc_structs_lock);
 
+/* Mutex to serialize hvc_open */
+static DEFINE_MUTEX(hvc_open_mutex);
 /*
  * This value is used to assign a tty->index value to a hvc_struct based
  * upon order of exposure via hvc_probe(), when we can not match it to
@@ -333,16 +335,24 @@ static int hvc_install(struct tty_driver *driver, struct tty_struct *tty)
  */
 static int hvc_open(struct tty_struct *tty, struct file * filp)
 {
-	struct hvc_struct *hp = tty->driver_data;
+	struct hvc_struct *hp;
 	unsigned long flags;
 	int rc = 0;
 
+	mutex_lock(&hvc_open_mutex);
+
+	hp = tty->driver_data;
+	if (!hp) {
+		rc = -EIO;
+		goto out;
+	}
+
 	spin_lock_irqsave(&hp->port.lock, flags);
 	/* Check and then increment for fast path open. */
 	if (hp->port.count++ > 0) {
 		spin_unlock_irqrestore(&hp->port.lock, flags);
 		hvc_kick();
-		return 0;
+		goto out;
 	} /* else count == 0 */
 	spin_unlock_irqrestore(&hp->port.lock, flags);
 
@@ -371,6 +381,8 @@ static int hvc_open(struct tty_struct *tty, struct file * filp)
 	/* Force wakeup of the polling thread */
 	hvc_kick();
 
+out:
+	mutex_unlock(&hvc_open_mutex);
 	return rc;
 }
 
-- 
2.25.1


^ permalink raw reply related

* [PATCH AUTOSEL 4.4 21/60] scsi: ibmvscsi: Don't send host info in adapter info MAD after LPM
From: Sasha Levin @ 2020-06-18  1:29 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Sasha Levin, Tyrel Datwyler, linuxppc-dev, linux-scsi,
	Martin K . Petersen
In-Reply-To: <20200618013004.610532-1-sashal@kernel.org>

From: Tyrel Datwyler <tyreld@linux.ibm.com>

[ Upstream commit 4919b33b63c8b69d8dcf2b867431d0e3b6dc6d28 ]

The adapter info MAD is used to send the client info and receive the host
info as a response. A persistent buffer is used and as such the client info
is overwritten after the response. During the course of a normal adapter
reset the client info is refreshed in the buffer in preparation for sending
the adapter info MAD.

However, in the special case of LPM where we reenable the CRQ instead of a
full CRQ teardown and reset we fail to refresh the client info in the
adapter info buffer. As a result, after Live Partition Migration (LPM) we
erroneously report the host's info as our own.

[mkp: typos]

Link: https://lore.kernel.org/r/20200603203632.18426-1-tyreld@linux.ibm.com
Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/scsi/ibmvscsi/ibmvscsi.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index e26747a1b35a..e7075aae15da 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -427,6 +427,8 @@ static int ibmvscsi_reenable_crq_queue(struct crq_queue *queue,
 	int rc = 0;
 	struct vio_dev *vdev = to_vio_dev(hostdata->dev);

+	set_adapter_info(hostdata);
+
 	/* Re-enable the CRQ */
 	do {
 		if (rc)
-- 
2.25.1

^ permalink raw reply related

* [PATCH AUTOSEL 4.4 16/60] powerpc/crashkernel: Take "mem=" option into account
From: Sasha Levin @ 2020-06-18  1:29 UTC (permalink / raw)
  To: linux-kernel, stable; +Cc: Sasha Levin, linuxppc-dev, Hari Bathini, Pingfan Liu
In-Reply-To: <20200618013004.610532-1-sashal@kernel.org>

From: Pingfan Liu <kernelfans@gmail.com>

[ Upstream commit be5470e0c285a68dc3afdea965032f5ddc8269d7 ]

'mem=" option is an easy way to put high pressure on memory during
some test. Hence after applying the memory limit, instead of total
mem, the actual usable memory should be considered when reserving mem
for crashkernel. Otherwise the boot up may experience OOM issue.

E.g. it would reserve 4G prior to the change and 512M afterward, if
passing
crashkernel="2G-4G:384M,4G-16G:512M,16G-64G:1G,64G-128G:2G,128G-:4G",
and mem=5G on a 256G machine.

This issue is powerpc specific because it puts higher priority on
fadump and kdump reservation than on "mem=". Referring the following
code:
    if (fadump_reserve_mem() == 0)
            reserve_crashkernel();
    ...
    /* Ensure that total memory size is page-aligned. */
    limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE);
    memblock_enforce_memory_limit(limit);

While on other arches, the effect of "mem=" takes a higher priority
and pass through memblock_phys_mem_size() before calling
reserve_crashkernel().

Signed-off-by: Pingfan Liu <kernelfans@gmail.com>
Reviewed-by: Hari Bathini <hbathini@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1585749644-4148-1-git-send-email-kernelfans@gmail.com
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 arch/powerpc/kernel/machine_kexec.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index 8dff2b371219..a14d9b008f74 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -113,11 +113,12 @@ void machine_kexec(struct kimage *image)
 
 void __init reserve_crashkernel(void)
 {
-	unsigned long long crash_size, crash_base;
+	unsigned long long crash_size, crash_base, total_mem_sz;
 	int ret;
 
+	total_mem_sz = memory_limit ? memory_limit : memblock_phys_mem_size();
 	/* use common parsing */
-	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
+	ret = parse_crashkernel(boot_command_line, total_mem_sz,
 			&crash_size, &crash_base);
 	if (ret == 0 && crash_size > 0) {
 		crashk_res.start = crash_base;
@@ -176,6 +177,7 @@ void __init reserve_crashkernel(void)
 	/* Crash kernel trumps memory limit */
 	if (memory_limit && memory_limit <= crashk_res.end) {
 		memory_limit = crashk_res.end + 1;
+		total_mem_sz = memory_limit;
 		printk("Adjusted memory limit for crashkernel, now 0x%llx\n",
 		       memory_limit);
 	}
@@ -184,7 +186,7 @@ void __init reserve_crashkernel(void)
 			"for crashkernel (System RAM: %ldMB)\n",
 			(unsigned long)(crash_size >> 20),
 			(unsigned long)(crashk_res.start >> 20),
-			(unsigned long)(memblock_phys_mem_size() >> 20));
+			(unsigned long)(total_mem_sz >> 20));
 
 	if (!memblock_is_region_memory(crashk_res.start, crash_size) ||
 	    memblock_reserve(crashk_res.start, crash_size)) {
-- 
2.25.1


^ permalink raw reply related

* [PATCH AUTOSEL 4.4 14/60] powerpc/pseries: Update hv-24x7 information after migration
From: Sasha Levin @ 2020-06-18  1:29 UTC (permalink / raw)
  To: linux-kernel, stable; +Cc: Sasha Levin, Kajol Jain, linuxppc-dev
In-Reply-To: <20200618013004.610532-1-sashal@kernel.org>

From: Kajol Jain <kjain@linux.ibm.com>

[ Upstream commit 373b373053384f12951ae9f916043d955501d482 ]

Function 'read_sys_info_pseries()' is added to get system parameter
values like number of sockets and chips per socket.
and it gets these details via rtas_call with token
"PROCESSOR_MODULE_INFO".

Incase lpar migrate from one system to another, system
parameter details like chips per sockets or number of sockets might
change. So, it needs to be re-initialized otherwise, these values
corresponds to previous system values.
This patch adds a call to 'read_sys_info_pseries()' from
'post-mobility_fixup()' to re-init the physsockets and physchips values

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200525104308.9814-6-kjain@linux.ibm.com
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 arch/powerpc/platforms/pseries/mobility.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 8d30a425a88a..58ddc4389a51 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -326,6 +326,9 @@ void post_mobility_fixup(void)
 	/* Possibly switch to a new RFI flush type */
 	pseries_setup_rfi_flush();

+	/* Reinitialise system information for hv-24x7 */
+	read_24x7_sys_info();
+
 	return;
 }

-- 
2.25.1

^ permalink raw reply related

* [PATCH AUTOSEL 4.4 10/60] ps3disk: use the default segment boundary
From: Sasha Levin @ 2020-06-18  1:29 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Sasha Levin, Emmanuel Nicolet, Geoff Levand, linux-block,
	linuxppc-dev
In-Reply-To: <20200618013004.610532-1-sashal@kernel.org>

From: Emmanuel Nicolet <emmanuel.nicolet@gmail.com>

[ Upstream commit 720bc316690bd27dea9d71510b50f0cd698ffc32 ]

Since commit dcebd755926b ("block: use bio_for_each_bvec() to compute
multi-page bvec count"), the kernel will bug_on on the PS3 because
bio_split() is called with sectors == 0:

  kernel BUG at block/bio.c:1853!
  Oops: Exception in kernel mode, sig: 5 [#1]
  BE PAGE_SIZE=4K MMU=Hash PREEMPT SMP NR_CPUS=8 NUMA PS3
  Modules linked in: firewire_sbp2 rtc_ps3(+) soundcore ps3_gelic(+) \
  ps3rom(+) firewire_core ps3vram(+) usb_common crc_itu_t
  CPU: 0 PID: 97 Comm: blkid Not tainted 5.3.0-rc4 #1
  NIP:  c00000000027d0d0 LR: c00000000027d0b0 CTR: 0000000000000000
  REGS: c00000000135ae90 TRAP: 0700   Not tainted  (5.3.0-rc4)
  MSR:  8000000000028032 <SF,EE,IR,DR,RI>  CR: 44008240  XER: 20000000
  IRQMASK: 0
  GPR00: c000000000289368 c00000000135b120 c00000000084a500 c000000004ff8300
  GPR04: 0000000000000c00 c000000004c905e0 c000000004c905e0 000000000000ffff
  GPR08: 0000000000000000 0000000000000001 0000000000000000 000000000000ffff
  GPR12: 0000000000000000 c0000000008ef000 000000000000003e 0000000000080001
  GPR16: 0000000000000100 000000000000ffff 0000000000000000 0000000000000004
  GPR20: c00000000062fd7e 0000000000000001 000000000000ffff 0000000000000080
  GPR24: c000000000781788 c00000000135b350 0000000000000080 c000000004c905e0
  GPR28: c00000000135b348 c000000004ff8300 0000000000000000 c000000004c90000
  NIP [c00000000027d0d0] .bio_split+0x28/0xac
  LR [c00000000027d0b0] .bio_split+0x8/0xac
  Call Trace:
  [c00000000135b120] [c00000000027d130] .bio_split+0x88/0xac (unreliable)
  [c00000000135b1b0] [c000000000289368] .__blk_queue_split+0x11c/0x53c
  [c00000000135b2d0] [c00000000028f614] .blk_mq_make_request+0x80/0x7d4
  [c00000000135b3d0] [c000000000283a8c] .generic_make_request+0x118/0x294
  [c00000000135b4b0] [c000000000283d34] .submit_bio+0x12c/0x174
  [c00000000135b580] [c000000000205a44] .mpage_bio_submit+0x3c/0x4c
  [c00000000135b600] [c000000000206184] .mpage_readpages+0xa4/0x184
  [c00000000135b750] [c0000000001ff8fc] .blkdev_readpages+0x24/0x38
  [c00000000135b7c0] [c0000000001589f0] .read_pages+0x6c/0x1a8
  [c00000000135b8b0] [c000000000158c74] .__do_page_cache_readahead+0x118/0x184
  [c00000000135b9b0] [c0000000001591a8] .force_page_cache_readahead+0xe4/0xe8
  [c00000000135ba50] [c00000000014fc24] .generic_file_read_iter+0x1d8/0x830
  [c00000000135bb50] [c0000000001ffadc] .blkdev_read_iter+0x40/0x5c
  [c00000000135bbc0] [c0000000001b9e00] .new_sync_read+0x144/0x1a0
  [c00000000135bcd0] [c0000000001bc454] .vfs_read+0xa0/0x124
  [c00000000135bd70] [c0000000001bc7a4] .ksys_read+0x70/0xd8
  [c00000000135be20] [c00000000000a524] system_call+0x5c/0x70
  Instruction dump:
  7fe3fb78 482e30dc 7c0802a6 482e3085 7c9e2378 f821ff71 7ca42b78 7d3e00d0
  7c7d1b78 79290fe0 7cc53378 69290001 <0b090000> 81230028 7bca0020 7929ba62
  [ end trace 313fec760f30aa1f ]---

The problem originates from setting the segment boundary of the
request queue to -1UL. This makes get_max_segment_size() return zero
when offset is zero, whatever the max segment size. The test with
BLK_SEG_BOUNDARY_MASK fails and 'mask - (mask & offset) + 1' overflows
to zero in the return statement.

Not setting the segment boundary and using the default
value (BLK_SEG_BOUNDARY_MASK) fixes the problem.

Signed-off-by: Emmanuel Nicolet <emmanuel.nicolet@gmail.com>
Signed-off-by: Geoff Levand <geoff@infradead.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/060a416c43138f45105c0540eff1a45539f7e2fc.1589049250.git.geoff@infradead.org
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/block/ps3disk.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index c120d70d3fb3..fc7a20286090 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -464,7 +464,6 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
 	blk_queue_bounce_limit(queue, BLK_BOUNCE_HIGH);
 
 	blk_queue_max_hw_sectors(queue, dev->bounce_size >> 9);
-	blk_queue_segment_boundary(queue, -1UL);
 	blk_queue_dma_alignment(queue, dev->blk_size-1);
 	blk_queue_logical_block_size(queue, dev->blk_size);
 
-- 
2.25.1


^ permalink raw reply related

* [PATCH AUTOSEL 4.9 69/80] ASoC: fsl_asrc_dma: Fix dma_chan leak when config DMA channel failed
From: Sasha Levin @ 2020-06-18  1:28 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Sasha Levin, Xin Tan, Xiyu Yang, alsa-devel, Mark Brown,
	linuxppc-dev
In-Reply-To: <20200618012819.609778-1-sashal@kernel.org>

From: Xiyu Yang <xiyuyang19@fudan.edu.cn>

[ Upstream commit 36124fb19f1ae68a500cd76a76d40c6e81bee346 ]

fsl_asrc_dma_hw_params() invokes dma_request_channel() or
fsl_asrc_get_dma_channel(), which returns a reference of the specified
dma_chan object to "pair->dma_chan[dir]" with increased refcnt.

The reference counting issue happens in one exception handling path of
fsl_asrc_dma_hw_params(). When config DMA channel failed for Back-End,
the function forgets to decrease the refcnt increased by
dma_request_channel() or fsl_asrc_get_dma_channel(), causing a refcnt
leak.

Fix this issue by calling dma_release_channel() when config DMA channel
failed.

Signed-off-by: Xiyu Yang <xiyuyang19@fudan.edu.cn>
Signed-off-by: Xin Tan <tanxin.ctf@gmail.com>
Link: https://lore.kernel.org/r/1590415966-52416-1-git-send-email-xiyuyang19@fudan.edu.cn
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 sound/soc/fsl/fsl_asrc_dma.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/fsl/fsl_asrc_dma.c b/sound/soc/fsl/fsl_asrc_dma.c
index dc30d780f874..3fcf174b99d3 100644
--- a/sound/soc/fsl/fsl_asrc_dma.c
+++ b/sound/soc/fsl/fsl_asrc_dma.c
@@ -243,6 +243,7 @@ static int fsl_asrc_dma_hw_params(struct snd_pcm_substream *substream,
 	ret = dmaengine_slave_config(pair->dma_chan[dir], &config_be);
 	if (ret) {
 		dev_err(dev, "failed to config DMA channel for Back-End\n");
+		dma_release_channel(pair->dma_chan[dir]);
 		return ret;
 	}
 
-- 
2.25.1


^ permalink raw reply related

* [PATCH AUTOSEL 4.9 55/80] powerpc/64s/pgtable: fix an undefined behaviour
From: Sasha Levin @ 2020-06-18  1:27 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Christophe Leroy, Qian Cai, linuxppc-dev, Sasha Levin
In-Reply-To: <20200618012819.609778-1-sashal@kernel.org>

From: Qian Cai <cai@lca.pw>

[ Upstream commit c2e929b18cea6cbf71364f22d742d9aad7f4677a ]

Booting a power9 server with hash MMU could trigger an undefined
behaviour because pud_offset(p4d, 0) will do,

0 >> (PAGE_SHIFT:16 + PTE_INDEX_SIZE:8 + H_PMD_INDEX_SIZE:10)

Fix it by converting pud_index() and friends to static inline
functions.

UBSAN: shift-out-of-bounds in arch/powerpc/mm/ptdump/ptdump.c:282:15
shift exponent 34 is too large for 32-bit type 'int'
CPU: 6 PID: 1 Comm: swapper/0 Not tainted 5.6.0-rc4-next-20200303+ #13
Call Trace:
dump_stack+0xf4/0x164 (unreliable)
ubsan_epilogue+0x18/0x78
__ubsan_handle_shift_out_of_bounds+0x160/0x21c
walk_pagetables+0x2cc/0x700
walk_pud at arch/powerpc/mm/ptdump/ptdump.c:282
(inlined by) walk_pagetables at arch/powerpc/mm/ptdump/ptdump.c:311
ptdump_check_wx+0x8c/0xf0
mark_rodata_ro+0x48/0x80
kernel_init+0x74/0x194
ret_from_kernel_thread+0x5c/0x74

Suggested-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Qian Cai <cai@lca.pw>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Christophe Leroy <christophe.leroy@c-s.fr>
Link: https://lore.kernel.org/r/20200306044852.3236-1-cai@lca.pw
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 23 ++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 9fd77f8794a0..315758c84187 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -754,10 +754,25 @@ extern struct page *pgd_page(pgd_t pgd);
 #define pud_page_vaddr(pud)	__va(pud_val(pud) & ~PUD_MASKED_BITS)
 #define pgd_page_vaddr(pgd)	__va(pgd_val(pgd) & ~PGD_MASKED_BITS)
 
-#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1))
-#define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1))
-#define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1))
-#define pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1))
+static inline unsigned long pgd_index(unsigned long address)
+{
+	return (address >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1);
+}
+
+static inline unsigned long pud_index(unsigned long address)
+{
+	return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
+}
+
+static inline unsigned long pmd_index(unsigned long address)
+{
+	return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
+}
+
+static inline unsigned long pte_index(unsigned long address)
+{
+	return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
+}
 
 /*
  * Find an entry in a page-table-directory.  We combine the address region
-- 
2.25.1


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox